diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..d90c5db8c --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,8 @@ +# Code owners file. +# This file controls who is tagged for review for any given pull request. +# +# For syntax help see: +# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax + + +/samples/ @shollyman @googleapis/python-samples-owners diff --git a/docs/conf.py b/docs/conf.py index 332b81b10..251e1f4ca 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -90,7 +90,12 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build"] +exclude_patterns = [ + "_build", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/snippets/README.rst", +] # The reST default role (used for this markup: `text`) to use for all # documents. diff --git a/samples/AUTHORING_GUIDE.md b/samples/AUTHORING_GUIDE.md new file mode 100644 index 000000000..55c97b32f --- /dev/null +++ b/samples/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/samples/CONTRIBUTING.md b/samples/CONTRIBUTING.md new file mode 100644 index 000000000..34c882b6f --- /dev/null +++ b/samples/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/samples/snippets/.gitignore b/samples/snippets/.gitignore new file mode 100644 index 000000000..0dc05ffad --- /dev/null +++ b/samples/snippets/.gitignore @@ -0,0 +1,2 @@ +client_secrets.json +service_account.json diff --git a/samples/snippets/README.rst b/samples/snippets/README.rst new file mode 100644 index 000000000..7c3e19e68 --- /dev/null +++ b/samples/snippets/README.rst @@ -0,0 +1,166 @@ + +.. This file is automatically generated. Do not edit this file directly. + +Google BigQuery Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/README.rst + + +This directory contains samples for Google BigQuery. `Google BigQuery`_ is Google's fully managed, petabyte scale, low cost analytics data warehouse. BigQuery is NoOps—there is no infrastructure to manage and you don't need a database administrator—so you can focus on analyzing data to find meaningful insights, use familiar SQL, and take advantage of our pay-as-you-go model. + + + + +.. _Google BigQuery: https://cloud.google.com/bigquery/docs + + +Setup +------------------------------------------------------------------------------- + + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + + + + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 3.6+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + + + + +Samples +------------------------------------------------------------------------------- + + +Quickstart ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/quickstart.py,bigquery/cloud-client/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python quickstart.py + + + + +Simple Application ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/simple_app.py,bigquery/cloud-client/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python simple_app.py + + + + +User Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=bigquery/cloud-client/user_credentials.py,bigquery/cloud-client/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python user_credentials.py + + + usage: user_credentials.py [-h] [--launch-browser] project + + Command-line application to run a query using user credentials. + + You must supply a client secrets file, which would normally be bundled with + your application. + + positional arguments: + project Project to use for BigQuery billing. + + optional arguments: + -h, --help show this help message and exit + --launch-browser Use a local server flow to authenticate. + + + + + + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ diff --git a/samples/snippets/README.rst.in b/samples/snippets/README.rst.in new file mode 100644 index 000000000..74b7fa940 --- /dev/null +++ b/samples/snippets/README.rst.in @@ -0,0 +1,31 @@ +# This file is used to generate README.rst + +product: + name: Google BigQuery + short_name: BigQuery + url: https://cloud.google.com/bigquery/docs + description: > + `Google BigQuery`_ is Google's fully managed, petabyte scale, low cost + analytics data warehouse. BigQuery is NoOps—there is no infrastructure to + manage and you don't need a database administrator—so you can focus on + analyzing data to find meaningful insights, use familiar SQL, and take + advantage of our pay-as-you-go model. + +required_role: BigQuery Admin + +setup: +- auth +- install_deps + +samples: +- name: Quickstart + file: quickstart.py +- name: Simple Application + file: simple_app.py +- name: User Credentials + file: user_credentials.py + show_help: true + +cloud_client_library: true + +folder: bigquery/cloud-client diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py new file mode 100644 index 000000000..58cd2b542 --- /dev/null +++ b/samples/snippets/authenticate_service_account.py @@ -0,0 +1,39 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + + +def main(): + key_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + + # [START bigquery_client_json_credentials] + from google.cloud import bigquery + from google.oauth2 import service_account + + # TODO(developer): Set key_path to the path to the service account key + # file. + # key_path = "path/to/service_account.json" + + credentials = service_account.Credentials.from_service_account_file( + key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], + ) + + client = bigquery.Client(credentials=credentials, project=credentials.project_id,) + # [END bigquery_client_json_credentials] + return client + + +if __name__ == "__main__": + main() diff --git a/samples/snippets/authenticate_service_account_test.py b/samples/snippets/authenticate_service_account_test.py new file mode 100644 index 000000000..131c69d2c --- /dev/null +++ b/samples/snippets/authenticate_service_account_test.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google.auth + +import authenticate_service_account + + +def mock_credentials(*args, **kwargs): + credentials, _ = google.auth.default( + ["https://www.googleapis.com/auth/cloud-platform"] + ) + return credentials + + +def test_main(monkeypatch): + monkeypatch.setattr( + "google.oauth2.service_account.Credentials.from_service_account_file", + mock_credentials, + ) + client = authenticate_service_account.main() + assert client is not None diff --git a/samples/snippets/authorized_view_tutorial.py b/samples/snippets/authorized_view_tutorial.py new file mode 100644 index 000000000..6b5cc378f --- /dev/null +++ b/samples/snippets/authorized_view_tutorial.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_authorized_view_tutorial(override_values={}): + # Note to user: This is a group email for testing purposes. Replace with + # your own group email address when running this code. + analyst_group_email = "example-analyst-group@google.com" + + # [START bigquery_authorized_view_tutorial] + # Create a source dataset + # [START bigquery_avt_create_source_dataset] + from google.cloud import bigquery + + client = bigquery.Client() + source_dataset_id = "github_source_data" + + # [END bigquery_authorized_view_tutorial] + # [END bigquery_avt_create_source_dataset] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) + # [START bigquery_authorized_view_tutorial] + # [START bigquery_avt_create_source_dataset] + + source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) + # Specify the geographic location where the dataset should reside. + source_dataset.location = "US" + source_dataset = client.create_dataset(source_dataset) # API request + # [END bigquery_avt_create_source_dataset] + + # Populate a source table + # [START bigquery_avt_create_source_table] + source_table_id = "github_contributors" + job_config = bigquery.QueryJobConfig() + job_config.destination = source_dataset.table(source_table_id) + sql = """ + SELECT commit, author, committer, repo_name + FROM `bigquery-public-data.github_repos.commits` + LIMIT 1000 + """ + query_job = client.query( + sql, + # Location must match that of the dataset(s) referenced in the query + # and of the destination table. + location="US", + job_config=job_config, + ) # API request - starts the query + + query_job.result() # Waits for the query to finish + # [END bigquery_avt_create_source_table] + + # Create a separate dataset to store your view + # [START bigquery_avt_create_shared_dataset] + shared_dataset_id = "shared_views" + + # [END bigquery_authorized_view_tutorial] + # [END bigquery_avt_create_shared_dataset] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + shared_dataset_id = override_values.get("shared_dataset_id", shared_dataset_id) + # [START bigquery_authorized_view_tutorial] + # [START bigquery_avt_create_shared_dataset] + + shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) + shared_dataset.location = "US" + shared_dataset = client.create_dataset(shared_dataset) # API request + # [END bigquery_avt_create_shared_dataset] + + # Create the view in the new dataset + # [START bigquery_avt_create_view] + shared_view_id = "github_analyst_view" + view = bigquery.Table(shared_dataset.table(shared_view_id)) + sql_template = """ + SELECT + commit, author.name as author, + committer.name as committer, repo_name + FROM + `{}.{}.{}` + """ + view.view_query = sql_template.format( + client.project, source_dataset_id, source_table_id + ) + view = client.create_table(view) # API request + # [END bigquery_avt_create_view] + + # Assign access controls to the dataset containing the view + # [START bigquery_avt_shared_dataset_access] + # analyst_group_email = 'data_analysts@example.com' + access_entries = shared_dataset.access_entries + access_entries.append( + bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) + ) + shared_dataset.access_entries = access_entries + shared_dataset = client.update_dataset( + shared_dataset, ["access_entries"] + ) # API request + # [END bigquery_avt_shared_dataset_access] + + # Authorize the view to access the source dataset + # [START bigquery_avt_source_dataset_access] + access_entries = source_dataset.access_entries + access_entries.append( + bigquery.AccessEntry(None, "view", view.reference.to_api_repr()) + ) + source_dataset.access_entries = access_entries + source_dataset = client.update_dataset( + source_dataset, ["access_entries"] + ) # API request + # [END bigquery_avt_source_dataset_access] + # [END bigquery_authorized_view_tutorial] + + +if __name__ == "__main__": + run_authorized_view_tutorial() diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py new file mode 100644 index 000000000..4c74020bd --- /dev/null +++ b/samples/snippets/authorized_view_tutorial_test.py @@ -0,0 +1,73 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +from google.cloud import bigquery +import pytest + +import authorized_view_tutorial + + +@pytest.fixture(scope="module") +def client(): + return bigquery.Client() + + +@pytest.fixture +def datasets_to_delete(client): + doomed = [] + yield doomed + for item in doomed: + client.delete_dataset(item, delete_contents=True) + + +def test_authorized_view_tutorial(client, datasets_to_delete): + override_values = { + "source_dataset_id": "github_source_data_{}".format( + str(uuid.uuid4()).replace("-", "_") + ), + "shared_dataset_id": "shared_views_{}".format( + str(uuid.uuid4()).replace("-", "_") + ), + } + source_dataset_ref = client.dataset(override_values["source_dataset_id"]) + shared_dataset_ref = client.dataset(override_values["shared_dataset_id"]) + datasets_to_delete.extend( + [override_values["source_dataset_id"], override_values["shared_dataset_id"]] + ) + + authorized_view_tutorial.run_authorized_view_tutorial(override_values) + + source_dataset = client.get_dataset(source_dataset_ref) + shared_dataset = client.get_dataset(shared_dataset_ref) + analyst_email = "example-analyst-group@google.com" + analyst_entries = [ + entry + for entry in shared_dataset.access_entries + if entry.entity_id == analyst_email + ] + assert len(analyst_entries) == 1 + assert analyst_entries[0].role == "READER" + + authorized_view_entries = [ + entry for entry in source_dataset.access_entries if entry.entity_type == "view" + ] + expected_view_ref = { + "projectId": client.project, + "datasetId": override_values["shared_dataset_id"], + "tableId": "github_analyst_view", + } + assert len(authorized_view_entries) == 1 + assert authorized_view_entries[0].entity_id == expected_view_ref diff --git a/samples/snippets/jupyter_tutorial_test.py b/samples/snippets/jupyter_tutorial_test.py new file mode 100644 index 000000000..353590b82 --- /dev/null +++ b/samples/snippets/jupyter_tutorial_test.py @@ -0,0 +1,166 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import IPython +from IPython.terminal import interactiveshell +from IPython.testing import tools +import matplotlib +import pytest + + +# Ignore semicolon lint warning because semicolons are used in notebooks +# flake8: noqa E703 + + +@pytest.fixture(scope="session") +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(request, ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def _strip_region_tags(sample_text): + """Remove blank lines and region tags from sample text""" + magic_lines = [ + line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line + ] + return "\n".join(magic_lines) + + +def test_jupyter_tutorial(ipython): + matplotlib.use("agg") + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year] + %%bigquery + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year_var] + %%bigquery total_births + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year_var] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "total_births" in ip.user_ns # verify that variable exists + total_births = ip.user_ns["total_births"] + # [START bigquery_jupyter_plot_births_by_year] + total_births.plot(kind="bar", x="year", y="birth_count") + # [END bigquery_jupyter_plot_births_by_year] + + sample = """ + # [START bigquery_jupyter_magic_gender_by_weekday] + %%bigquery births_by_weekday + SELECT + wday, + SUM(CASE WHEN is_male THEN 1 ELSE 0 END) AS male_births, + SUM(CASE WHEN is_male THEN 0 ELSE 1 END) AS female_births + FROM `bigquery-public-data.samples.natality` + WHERE wday IS NOT NULL + GROUP BY wday + ORDER BY wday ASC + # [END bigquery_jupyter_magic_gender_by_weekday] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "births_by_weekday" in ip.user_ns # verify that variable exists + births_by_weekday = ip.user_ns["births_by_weekday"] + # [START bigquery_jupyter_plot_births_by_weekday] + births_by_weekday.plot(x="wday") + # [END bigquery_jupyter_plot_births_by_weekday] + + # [START bigquery_jupyter_import_and_client] + from google.cloud import bigquery + + client = bigquery.Client() + # [END bigquery_jupyter_import_and_client] + + # [START bigquery_jupyter_query_plurality_by_year] + sql = """ + SELECT + plurality, + COUNT(1) AS count, + year + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(plurality) AND plurality > 1 + GROUP BY + plurality, year + ORDER BY + count DESC + """ + df = client.query(sql).to_dataframe() + df.head() + # [END bigquery_jupyter_query_plurality_by_year] + + # [START bigquery_jupyter_plot_plurality_by_year] + pivot_table = df.pivot(index="year", columns="plurality", values="count") + pivot_table.plot(kind="bar", stacked=True, figsize=(15, 7)) + # [END bigquery_jupyter_plot_plurality_by_year] + + # [START bigquery_jupyter_query_births_by_gestation] + sql = """ + SELECT + gestation_weeks, + COUNT(1) AS count + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(gestation_weeks) AND gestation_weeks <> 99 + GROUP BY + gestation_weeks + ORDER BY + gestation_weeks + """ + df = client.query(sql).to_dataframe() + # [END bigquery_jupyter_query_births_by_gestation] + + # [START bigquery_jupyter_plot_births_by_gestation] + ax = df.plot(kind="bar", x="gestation_weeks", y="count", figsize=(15, 7)) + ax.set_title("Count of Births by Gestation Weeks") + ax.set_xlabel("Gestation Weeks") + ax.set_ylabel("Count") + # [END bigquery_jupyter_plot_births_by_gestation] diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py new file mode 100644 index 000000000..b2b607b0d --- /dev/null +++ b/samples/snippets/natality_tutorial.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_natality_tutorial(override_values={}): + # [START bigquery_query_natality_tutorial] + """Create a Google BigQuery linear regression input table. + + In the code below, the following actions are taken: + * A new dataset is created "natality_regression." + * A query is run against the public dataset, + bigquery-public-data.samples.natality, selecting only the data of + interest to the regression, the output of which is stored in a new + "regression_input" table. + * The output table is moved over the wire to the user's default project via + the built-in BigQuery Connector for Spark that bridges BigQuery and + Cloud Dataproc. + """ + + from google.cloud import bigquery + + # Create a new Google BigQuery client using Google Cloud Platform project + # defaults. + client = bigquery.Client() + + # Prepare a reference to a new dataset for storing the query results. + dataset_id = "natality_regression" + # [END bigquery_query_natality_tutorial] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + # [START bigquery_query_natality_tutorial] + + dataset = bigquery.Dataset(client.dataset(dataset_id)) + + # Create the new BigQuery dataset. + dataset = client.create_dataset(dataset) + + # In the new BigQuery dataset, create a reference to a new table for + # storing the query results. + table_ref = dataset.table("regression_input") + + # Configure the query job. + job_config = bigquery.QueryJobConfig() + + # Set the destination table to the table reference created above. + job_config.destination = table_ref + + # Set up a query in Standard SQL, which is the default for the BigQuery + # Python client library. + # The query selects the fields of interest. + query = """ + SELECT + weight_pounds, mother_age, father_age, gestation_weeks, + weight_gain_pounds, apgar_5min + FROM + `bigquery-public-data.samples.natality` + WHERE + weight_pounds IS NOT NULL + AND mother_age IS NOT NULL + AND father_age IS NOT NULL + AND gestation_weeks IS NOT NULL + AND weight_gain_pounds IS NOT NULL + AND apgar_5min IS NOT NULL + """ + + # Run the query. + query_job = client.query(query, job_config=job_config) + query_job.result() # Waits for the query to finish + # [END bigquery_query_natality_tutorial] + + +if __name__ == "__main__": + run_natality_tutorial() diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py new file mode 100644 index 000000000..fae72fa46 --- /dev/null +++ b/samples/snippets/natality_tutorial_test.py @@ -0,0 +1,50 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +from google.cloud import bigquery +import pytest + +import natality_tutorial + + +@pytest.fixture(scope="module") +def client(): + return bigquery.Client() + + +@pytest.fixture +def datasets_to_delete(client): + doomed = [] + yield doomed + for item in doomed: + client.delete_dataset(item, delete_contents=True) + + +def test_natality_tutorial(client, datasets_to_delete): + override_values = { + "dataset_id": "natality_regression_{}".format( + str(uuid.uuid4()).replace("-", "_") + ), + } + datasets_to_delete.append(override_values["dataset_id"]) + + natality_tutorial.run_natality_tutorial(override_values) + + table_ref = bigquery.Dataset(client.dataset(override_values["dataset_id"])).table( + "regression_input" + ) + table = client.get_table(table_ref) + assert table.num_rows > 0 diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py new file mode 100644 index 000000000..5660f08be --- /dev/null +++ b/samples/snippets/noxfile.py @@ -0,0 +1,222 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars(): + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir): + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session): + session.install("flake8", "flake8-import-order") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + ".", + ] + session.run("flake8", *args) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session, post_install=None): + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session): + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root(): + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session, path): + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/samples/snippets/noxfile_config.py b/samples/snippets/noxfile_config.py new file mode 100644 index 000000000..b1f495f01 --- /dev/null +++ b/samples/snippets/noxfile_config.py @@ -0,0 +1,35 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": 'GOOGLE_CLOUD_PROJECT', + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py new file mode 100644 index 000000000..56d6fd843 --- /dev/null +++ b/samples/snippets/quickstart.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_quickstart(override_values={}): + # [START bigquery_quickstart] + # Imports the Google Cloud client library + from google.cloud import bigquery + + # Instantiates a client + bigquery_client = bigquery.Client() + + # The name for the new dataset + dataset_id = "my_new_dataset" + + # [END bigquery_quickstart] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + # [START bigquery_quickstart] + + # Prepares a reference to the new dataset + dataset_ref = bigquery_client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + + # Creates the new dataset + dataset = bigquery_client.create_dataset(dataset) + + print("Dataset {} created.".format(dataset.dataset_id)) + # [END bigquery_quickstart] + + +if __name__ == "__main__": + run_quickstart() diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py new file mode 100644 index 000000000..a5e3a13e3 --- /dev/null +++ b/samples/snippets/quickstart_test.py @@ -0,0 +1,50 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +from google.cloud import bigquery +import pytest + +import quickstart + + +# Must match the dataset listed in quickstart.py (there's no easy way to +# extract this). +DATASET_ID = "my_new_dataset" + + +@pytest.fixture(scope="module") +def client(): + return bigquery.Client() + + +@pytest.fixture +def datasets_to_delete(client): + doomed = [] + yield doomed + for item in doomed: + client.delete_dataset(item, delete_contents=True) + + +def test_quickstart(capsys, client, datasets_to_delete): + + override_values = { + "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), + } + datasets_to_delete.append(override_values["dataset_id"]) + + quickstart.run_quickstart(override_values) + out, _ = capsys.readouterr() + assert override_values["dataset_id"] in out diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt new file mode 100644 index 000000000..41c4d5110 --- /dev/null +++ b/samples/snippets/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==5.3.2 +mock==3.0.5 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt new file mode 100644 index 000000000..6339b3047 --- /dev/null +++ b/samples/snippets/requirements.txt @@ -0,0 +1,5 @@ +google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.25.0 +google-auth-oauthlib==0.4.1 +ipython==7.15.0 +matplotlib==3.2.2 +pytz==2020.1 diff --git a/samples/snippets/simple_app.py b/samples/snippets/simple_app.py new file mode 100644 index 000000000..c21ae86f4 --- /dev/null +++ b/samples/snippets/simple_app.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple application that performs a query with BigQuery.""" +# [START bigquery_simple_app_all] +# [START bigquery_simple_app_deps] +from google.cloud import bigquery + +# [END bigquery_simple_app_deps] + + +def query_stackoverflow(): + # [START bigquery_simple_app_client] + client = bigquery.Client() + # [END bigquery_simple_app_client] + # [START bigquery_simple_app_query] + query_job = client.query( + """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + LIMIT 10""" + ) + + results = query_job.result() # Waits for job to complete. + # [END bigquery_simple_app_query] + + # [START bigquery_simple_app_print] + for row in results: + print("{} : {} views".format(row.url, row.view_count)) + # [END bigquery_simple_app_print] + + +if __name__ == "__main__": + query_stackoverflow() +# [END bigquery_simple_app_all] diff --git a/samples/snippets/simple_app_test.py b/samples/snippets/simple_app_test.py new file mode 100644 index 000000000..5c608e1fd --- /dev/null +++ b/samples/snippets/simple_app_test.py @@ -0,0 +1,21 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import simple_app + + +def test_query_stackoverflow(capsys): + simple_app.query_stackoverflow() + out, _ = capsys.readouterr() + assert "views" in out diff --git a/samples/snippets/user_credentials.py b/samples/snippets/user_credentials.py new file mode 100644 index 000000000..6089d9fd9 --- /dev/null +++ b/samples/snippets/user_credentials.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to run a query using user credentials. + +You must supply a client secrets file, which would normally be bundled with +your application. +""" + +import argparse + + +def main(project, launch_browser=True): + # [START bigquery_auth_user_flow] + from google_auth_oauthlib import flow + + # TODO: Uncomment the line below to set the `launch_browser` variable. + # launch_browser = True + # + # The `launch_browser` boolean variable indicates if a local server is used + # as the callback URL in the auth flow. A value of `True` is recommended, + # but a local server does not work if accessing the application remotely, + # such as over SSH or from a remote Jupyter notebook. + + appflow = flow.InstalledAppFlow.from_client_secrets_file( + "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] + ) + + if launch_browser: + appflow.run_local_server() + else: + appflow.run_console() + + credentials = appflow.credentials + # [END bigquery_auth_user_flow] + + # [START bigquery_auth_user_query] + from google.cloud import bigquery + + # TODO: Uncomment the line below to set the `project` variable. + # project = 'user-project-id' + # + # The `project` variable defines the project to be billed for query + # processing. The user must have the bigquery.jobs.create permission on + # this project to run a query. See: + # https://cloud.google.com/bigquery/docs/access-control#permissions + + client = bigquery.Client(project=project, credentials=credentials) + + query_string = """SELECT name, SUM(number) as total + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE name = 'William' + GROUP BY name; + """ + query_job = client.query(query_string) + + # Print the results. + for row in query_job.result(): # Wait for the job to complete. + print("{}: {}".format(row["name"], row["total"])) + # [END bigquery_auth_user_query] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--launch-browser", + help="Use a local server flow to authenticate. ", + action="store_true", + ) + parser.add_argument("project", help="Project to use for BigQuery billing.") + + args = parser.parse_args() + + main(args.project, launch_browser=args.launch_browser) diff --git a/samples/snippets/user_credentials_test.py b/samples/snippets/user_credentials_test.py new file mode 100644 index 000000000..829502d25 --- /dev/null +++ b/samples/snippets/user_credentials_test.py @@ -0,0 +1,41 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import google.auth +import mock +import pytest + +from user_credentials import main + + +PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] + + +@pytest.fixture +def mock_flow(): + flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) + + with flow_patch as flow_mock: + flow_mock.from_client_secrets_file.return_value = flow_mock + flow_mock.credentials = google.auth.default()[0] + yield flow_mock + + +def test_auth_query_console(mock_flow, capsys): + main(PROJECT, launch_browser=False) + out, _ = capsys.readouterr() + # Fun fact: William P. Wood was the 1st director of the US Secret Service. + assert "William" in out diff --git a/synth.py b/synth.py index 2bc3798ea..f6009ab6c 100644 --- a/synth.py +++ b/synth.py @@ -16,6 +16,7 @@ import synthtool as s from synthtool import gcp +from synthtool.languages import python gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() @@ -58,11 +59,17 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100) +templated_files = common.py_library(cov_level=100, samples=True) # BigQuery has a custom multiprocessing note s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) +# ---------------------------------------------------------------------------- +# Samples templates +# ---------------------------------------------------------------------------- +python.py_samples() + + s.replace( "docs/conf.py", r'\{"members": True\}',