Skip to content

CLN: use pydata-google-auth for auth flow #241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 4, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
Changelog
=========

.. _changelog-0.9.0:

0.9.0 / TBD
-----------

Internal changes
~~~~~~~~~~~~~~~~

- **New dependency** Use the ``pydata-google-auth`` package for
authentication. (:issue:`241`)

.. _changelog-0.8.0:

0.8.0 / 2018-11-12
Expand Down
3 changes: 1 addition & 2 deletions docs/source/howto/authentication.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ methods:
3. User account credentials.

pandas-gbq loads cached credentials from a hidden user folder on the
operating system. Override the location of the cached user credentials
by setting the ``PANDAS_GBQ_CREDENTIALS_FILE`` environment variable.
operating system.

If pandas-gbq does not find cached credentials, it opens a browser window
asking for you to authenticate to your BigQuery account using the product
Expand Down
1 change: 1 addition & 0 deletions docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Dependencies

This module requires following additional dependencies:

- `pydata-google-auth <https://github.com/pydata/pydata-google-auth>`__: Helpers for authentication to Google's API
- `google-auth <https://github.com/GoogleCloudPlatform/google-auth-library-python>`__: authentication and authorization for Google's API
- `google-auth-oauthlib <https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib>`__: integration with `oauthlib <https://github.com/idan/oauthlib>`__ for end-user authentication
- `google-cloud-bigquery <http://github.com/GoogleCloudPlatform/google-cloud-python>`__: Google Cloud client library for BigQuery
Expand Down
267 changes: 21 additions & 246 deletions pandas_gbq/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,34 @@
logger = logging.getLogger(__name__)


CREDENTIALS_CACHE_DIRNAME = "pandas_gbq"
CREDENTIALS_CACHE_FILENAME = "bigquery_credentials.dat"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To confirm, you think we should have something specific for pandas_gbq rather than anything using pydata_google_auth?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was mostly to be backwards compatible. I guess people won't be too annoyed if we change the credentials location and they have to reauth?

Another concern is if they use the pydata_google_auth location, it may get confusing if they use that library with different scopes somewhere else.

SCOPES = ["https://www.googleapis.com/auth/bigquery"]


def get_credentials(
private_key=None,
project_id=None,
reauth=False,
auth_local_webserver=False,
try_credentials=None,
private_key=None, project_id=None, reauth=False, auth_local_webserver=False
):
if try_credentials is None:
try_credentials = _try_credentials
import pydata_google_auth

if private_key:
return get_service_account_credentials(private_key)

# Try to retrieve Application Default Credentials
credentials, default_project = get_application_default_credentials(
try_credentials, project_id=project_id
)

if credentials:
return credentials, default_project

credentials = get_user_account_credentials(
try_credentials,
project_id=project_id,
reauth=reauth,
credentials, default_project_id = pydata_google_auth.default(
SCOPES,
client_id="495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd.apps.googleusercontent.com",
client_secret="kOc9wMptUtxkcIFbtZCcrEAc",
credentials_cache=get_credentials_cache(reauth),
auth_local_webserver=auth_local_webserver,
)

project_id = project_id or default_project_id
return credentials, project_id


def get_service_account_credentials(private_key):
"""DEPRECATED: Load service account credentials from key data or key path."""

import google.auth.transport.requests
from google.oauth2.service_account import Credentials

Expand Down Expand Up @@ -87,233 +81,14 @@ def get_service_account_credentials(private_key):
)


def get_application_default_credentials(try_credentials, project_id=None):
"""
This method tries to retrieve the "default application credentials".
This could be useful for running code on Google Cloud Platform.

Parameters
----------
project_id (str, optional): Override the default project ID.

Returns
-------
- GoogleCredentials,
If the default application credentials can be retrieved
from the environment. The retrieved credentials should also
have access to the project (project_id) on BigQuery.
- OR None,
If default application credentials can not be retrieved
from the environment. Or, the retrieved credentials do not
have access to the project (project_id) on BigQuery.
"""
import google.auth
from google.auth.exceptions import DefaultCredentialsError

try:
credentials, default_project = google.auth.default(scopes=SCOPES)
except (DefaultCredentialsError, IOError):
return None, None

# Even though we now have credentials, check that the credentials can be
# used with BigQuery. For example, we could be running on a GCE instance
# that does not allow the BigQuery scopes.
billing_project = project_id or default_project
return try_credentials(billing_project, credentials), billing_project


def get_user_account_credentials(
try_credentials,
project_id=None,
reauth=False,
auth_local_webserver=False,
credentials_path=None,
):
"""Gets user account credentials.

This method authenticates using user credentials, either loading saved
credentials from a file or by going through the OAuth flow.
def get_credentials_cache(reauth,):
import pydata_google_auth.cache

Parameters
----------
None

Returns
-------
GoogleCredentials : credentials
Credentials for the user with BigQuery access.
"""
from google_auth_oauthlib.flow import InstalledAppFlow
from oauthlib.oauth2.rfc6749.errors import OAuth2Error

# Use the default credentials location under ~/.config and the
# equivalent directory on windows if the user has not specified a
# credentials path.
if not credentials_path:
credentials_path = get_default_credentials_path()

# Previously, pandas-gbq saved user account credentials in the
# current working directory. If the bigquery_credentials.dat file
# exists in the current working directory, move the credentials to
# the new default location.
if os.path.isfile("bigquery_credentials.dat"):
os.rename("bigquery_credentials.dat", credentials_path)

credentials = None
if not reauth:
credentials = load_user_account_credentials(
try_credentials,
project_id=project_id,
credentials_path=credentials_path,
)

client_config = {
"installed": {
"client_id": (
"495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd"
".apps.googleusercontent.com"
),
"client_secret": "kOc9wMptUtxkcIFbtZCcrEAc",
"redirect_uris": ["urn:ietf:wg:oauth:2.0:oob"],
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://accounts.google.com/o/oauth2/token",
}
}

if credentials is None:
app_flow = InstalledAppFlow.from_client_config(
client_config, scopes=SCOPES
if reauth:
return pydata_google_auth.cache.WriteOnlyCredentialsCache(
dirname=CREDENTIALS_CACHE_DIRNAME,
filename=CREDENTIALS_CACHE_FILENAME,
)

try:
if auth_local_webserver:
credentials = app_flow.run_local_server()
else:
credentials = app_flow.run_console()
except OAuth2Error as ex:
raise pandas_gbq.exceptions.AccessDenied(
"Unable to get valid credentials: {0}".format(ex)
)

save_user_account_credentials(credentials, credentials_path)

return credentials


def load_user_account_credentials(
try_credentials, project_id=None, credentials_path=None
):
"""
Loads user account credentials from a local file.

.. versionadded 0.2.0

Parameters
----------
None

Returns
-------
- GoogleCredentials,
If the credentials can loaded. The retrieved credentials should
also have access to the project (project_id) on BigQuery.
- OR None,
If credentials can not be loaded from a file. Or, the retrieved
credentials do not have access to the project (project_id)
on BigQuery.
"""
import google.auth.transport.requests
from google.oauth2.credentials import Credentials

try:
with open(credentials_path) as credentials_file:
credentials_json = json.load(credentials_file)
except (IOError, ValueError):
return None

credentials = Credentials(
token=credentials_json.get("access_token"),
refresh_token=credentials_json.get("refresh_token"),
id_token=credentials_json.get("id_token"),
token_uri=credentials_json.get("token_uri"),
client_id=credentials_json.get("client_id"),
client_secret=credentials_json.get("client_secret"),
scopes=credentials_json.get("scopes"),
return pydata_google_auth.cache.ReadWriteCredentialsCache(
dirname=CREDENTIALS_CACHE_DIRNAME, filename=CREDENTIALS_CACHE_FILENAME
)

# Refresh the token before trying to use it.
request = google.auth.transport.requests.Request()
credentials.refresh(request)

return try_credentials(project_id, credentials)


def get_default_credentials_path():
"""
Gets the default path to the BigQuery credentials

.. versionadded 0.3.0

Returns
-------
Path to the BigQuery credentials
"""
if os.name == "nt":
config_path = os.environ["APPDATA"]
else:
config_path = os.path.join(os.path.expanduser("~"), ".config")

config_path = os.path.join(config_path, "pandas_gbq")

# Create a pandas_gbq directory in an application-specific hidden
# user folder on the operating system.
if not os.path.exists(config_path):
os.makedirs(config_path)

return os.path.join(config_path, "bigquery_credentials.dat")


def save_user_account_credentials(credentials, credentials_path):
"""
Saves user account credentials to a local file.

.. versionadded 0.2.0
"""
try:
with open(credentials_path, "w") as credentials_file:
credentials_json = {
"refresh_token": credentials.refresh_token,
"id_token": credentials.id_token,
"token_uri": credentials.token_uri,
"client_id": credentials.client_id,
"client_secret": credentials.client_secret,
"scopes": credentials.scopes,
}
json.dump(credentials_json, credentials_file)
except IOError:
logger.warning("Unable to save credentials.")


def _try_credentials(project_id, credentials):
from google.cloud import bigquery
import google.api_core.exceptions
import google.auth.exceptions

if not credentials:
return None
if not project_id:
return credentials

try:
client = bigquery.Client(project=project_id, credentials=credentials)
# Check if the application has rights to the BigQuery project
client.query("SELECT 1").result()
return credentials
except google.api_core.exceptions.GoogleAPIError:
return None
except google.auth.exceptions.RefreshError:
# Sometimes (such as on Travis) google-auth returns GCE credentials,
# but fetching the token for those credentials doesn't actually work.
# See:
# https://github.com/googleapis/google-auth-library-python/issues/287
return None
17 changes: 7 additions & 10 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ def _check_google_client_version():

def _test_google_api_imports():

try:
import pydata_google_auth
except ImportError as ex:
raise ImportError(
"pandas-gbq requires pydata-google-auth: {0}".format(ex)
)

try:
from google_auth_oauthlib.flow import InstalledAppFlow # noqa
except ImportError as ex:
Expand Down Expand Up @@ -297,7 +304,6 @@ def __init__(
auth_local_webserver=False,
dialect="legacy",
location=None,
try_credentials=None,
credentials=None,
):
global context
Expand All @@ -313,7 +319,6 @@ def __init__(
self.auth_local_webserver = auth_local_webserver
self.dialect = dialect
self.credentials = credentials
self.credentials_path = _get_credentials_file()
default_project = None

# Load credentials from cache.
Expand All @@ -328,7 +333,6 @@ def __init__(
project_id=project_id,
reauth=reauth,
auth_local_webserver=auth_local_webserver,
try_credentials=try_credentials,
)

if self.project_id is None:
Expand Down Expand Up @@ -635,10 +639,6 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
table.create(table_id, table_schema)


def _get_credentials_file():
return os.environ.get("PANDAS_GBQ_CREDENTIALS_FILE")


def _parse_schema(schema_fields):
# see:
# http://pandas.pydata.org/pandas-docs/dev/missing_data.html
Expand Down Expand Up @@ -1003,9 +1003,6 @@ def to_gbq(
reauth=reauth,
auth_local_webserver=auth_local_webserver,
location=location,
# Avoid reads when writing tables.
# https://github.com/pydata/pandas-gbq/issues/202
try_credentials=lambda project, creds: creds,
credentials=credentials,
private_key=private_key,
)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def readme():
INSTALL_REQUIRES = [
"setuptools",
"pandas",
"pydata-google-auth",
"google-auth",
"google-auth-oauthlib",
"google-cloud-bigquery>=0.32.0",
Expand Down
Loading