From e2f9821e66507ee3ac3260d9c1b0ba899cf2efc4 Mon Sep 17 00:00:00 2001 From: Albert DeFusco Date: Thu, 17 Feb 2022 15:20:51 -0600 Subject: [PATCH] feat: Allow base64 encoded credentials in URI (#410) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #409 🦕 To enable credential information to be included in the connection URL, for cases where you don't have a credentials file locally on the client, I propose the `credentials_base64` parameter. It requires that the user have encoded their credentials JSON file using a number of techniques like `base64`, or `openssl base64`, or `python -m base64`, or www.base64encode.org. I have used nox to run unit and system tests for Python 3.6 - 3.9. I'm tracking down a separate issue with my computer for why 3.10 tests did not run. - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-sqlalchemy/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- README.rst | 28 +++++++++++++++++- sqlalchemy_bigquery/_helpers.py | 6 ++++ sqlalchemy_bigquery/base.py | 5 ++++ sqlalchemy_bigquery/parse_url.py | 8 +++++ tests/system/test_helpers.py | 29 ++++++++++++++++++ tests/unit/test_helpers.py | 50 +++++++++++++++++++++++++++++++- tests/unit/test_parse_url.py | 11 +++++-- 7 files changed, 133 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 9b76eab6..c3586a4b 100644 --- a/README.rst +++ b/README.rst @@ -180,7 +180,7 @@ Connection String Parameters There are many situations where you can't call ``create_engine`` directly, such as when using tools like `Flask SQLAlchemy `_. For situations like these, or for situations where you want the ``Client`` to have a `default_query_job_config `_, you can pass many arguments in the query of the connection string. -The ``credentials_path``, ``credentials_info``, ``location``, ``arraysize`` and ``list_tables_page_size`` parameters are used by this library, and the rest are used to create a `QueryJobConfig `_ +The ``credentials_path``, ``credentials_info``, ``credentials_base64``, ``location``, ``arraysize`` and ``list_tables_page_size`` parameters are used by this library, and the rest are used to create a `QueryJobConfig `_ Note that if you want to use query strings, it will be more reliable if you use three slashes, so ``'bigquery:///?a=b'`` will work reliably, but ``'bigquery://?a=b'`` might be interpreted as having a "database" of ``?a=b``, depending on the system being used to parse the connection string. @@ -207,6 +207,32 @@ Here are examples of all the supported arguments. Any not present are either for 'write_disposition=WRITE_APPEND' ) +In cases where you wish to include the full credentials in the connection URI you can base64 the credentials JSON file and supply the encoded string to the ``credentials_base64`` parameter. + +.. code-block:: python + + engine = create_engine( + 'bigquery://some-project/some-dataset' '?' + 'credentials_base64=eyJrZXkiOiJ2YWx1ZSJ9Cg==' '&' + 'location=some-location' '&' + 'arraysize=1000' '&' + 'list_tables_page_size=100' '&' + 'clustering_fields=a,b,c' '&' + 'create_disposition=CREATE_IF_NEEDED' '&' + 'destination=different-project.different-dataset.table' '&' + 'destination_encryption_configuration=some-configuration' '&' + 'dry_run=true' '&' + 'labels=a:b,c:d' '&' + 'maximum_bytes_billed=1000' '&' + 'priority=INTERACTIVE' '&' + 'schema_update_options=ALLOW_FIELD_ADDITION,ALLOW_FIELD_RELAXATION' '&' + 'use_query_cache=true' '&' + 'write_disposition=WRITE_APPEND' + ) + +To create the base64 encoded string you can use the command line tool ``base64``, or ``openssl base64``, or ``python -m base64``. + +Alternatively, you can use an online generator like `www.base64encode.org _` to paste your credentials JSON file to be encoded. Creating tables ^^^^^^^^^^^^^^^ diff --git a/sqlalchemy_bigquery/_helpers.py b/sqlalchemy_bigquery/_helpers.py index 95ca4b17..b03e232a 100644 --- a/sqlalchemy_bigquery/_helpers.py +++ b/sqlalchemy_bigquery/_helpers.py @@ -12,6 +12,8 @@ from google.cloud import bigquery from google.oauth2 import service_account import sqlalchemy +import base64 +import json USER_AGENT_TEMPLATE = "sqlalchemy/{}" @@ -30,12 +32,16 @@ def google_client_info(): def create_bigquery_client( credentials_info=None, credentials_path=None, + credentials_base64=None, default_query_job_config=None, location=None, project_id=None, ): default_project = None + if credentials_base64: + credentials_info = json.loads(base64.b64decode(credentials_base64)) + if credentials_path: credentials = service_account.Credentials.from_service_account_file( credentials_path diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index ae96d6f4..136758ab 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -753,6 +753,7 @@ def __init__( credentials_path=None, location=None, credentials_info=None, + credentials_base64=None, list_tables_page_size=1000, *args, **kwargs, @@ -761,6 +762,7 @@ def __init__( self.arraysize = arraysize self.credentials_path = credentials_path self.credentials_info = credentials_info + self.credentials_base64 = credentials_base64 self.location = location self.dataset_id = None self.list_tables_page_size = list_tables_page_size @@ -791,6 +793,7 @@ def create_connect_args(self, url): dataset_id, arraysize, credentials_path, + credentials_base64, default_query_job_config, list_tables_page_size, ) = parse_url(url) @@ -799,6 +802,7 @@ def create_connect_args(self, url): self.list_tables_page_size = list_tables_page_size or self.list_tables_page_size self.location = location or self.location self.credentials_path = credentials_path or self.credentials_path + self.credentials_base64 = credentials_base64 or self.credentials_base64 self.dataset_id = dataset_id self._add_default_dataset_to_job_config( default_query_job_config, project_id, dataset_id @@ -806,6 +810,7 @@ def create_connect_args(self, url): client = _helpers.create_bigquery_client( credentials_path=self.credentials_path, credentials_info=self.credentials_info, + credentials_base64=self.credentials_base64, project_id=project_id, location=self.location, default_query_job_config=default_query_job_config, diff --git a/sqlalchemy_bigquery/parse_url.py b/sqlalchemy_bigquery/parse_url.py index aeb1196e..b1d4b589 100644 --- a/sqlalchemy_bigquery/parse_url.py +++ b/sqlalchemy_bigquery/parse_url.py @@ -68,6 +68,7 @@ def parse_url(url): # noqa: C901 dataset_id = url.database or None arraysize = None credentials_path = None + credentials_base64 = None list_tables_page_size = None # location @@ -78,6 +79,10 @@ def parse_url(url): # noqa: C901 if "credentials_path" in query: credentials_path = query.pop("credentials_path") + # credentials_base64 + if "credentials_base64" in query: + credentials_base64 = query.pop("credentials_base64") + # arraysize if "arraysize" in query: str_arraysize = query.pop("arraysize") @@ -107,6 +112,7 @@ def parse_url(url): # noqa: C901 dataset_id, arraysize, credentials_path, + credentials_base64, QueryJobConfig(), list_tables_page_size, ) @@ -117,6 +123,7 @@ def parse_url(url): # noqa: C901 dataset_id, arraysize, credentials_path, + credentials_base64, None, list_tables_page_size, ) @@ -265,6 +272,7 @@ def parse_url(url): # noqa: C901 dataset_id, arraysize, credentials_path, + credentials_base64, job_config, list_tables_page_size, ) diff --git a/tests/system/test_helpers.py b/tests/system/test_helpers.py index 5d4e7c71..62f22688 100644 --- a/tests/system/test_helpers.py +++ b/tests/system/test_helpers.py @@ -4,6 +4,7 @@ # license that can be found in the LICENSE file or at # https://opensource.org/licenses/MIT. +import base64 import os import json @@ -30,6 +31,12 @@ def credentials_info(credentials_path): return json.load(credentials_file) +@pytest.fixture +def credentials_base64(credentials_path): + with open(credentials_path) as credentials_file: + return base64.b64encode(credentials_file.read().encode()).decode() + + def test_create_bigquery_client_with_credentials_path( module_under_test, credentials_path, credentials_info ): @@ -72,3 +79,25 @@ def test_create_bigquery_client_with_credentials_info_respects_project( credentials_info=credentials_info, project_id="connection-url-project", ) assert bqclient.project == "connection-url-project" + + +def test_create_bigquery_client_with_credentials_base64( + module_under_test, credentials_base64, credentials_info +): + bqclient = module_under_test.create_bigquery_client( + credentials_base64=credentials_base64 + ) + assert bqclient.project == credentials_info["project_id"] + + +def test_create_bigquery_client_with_credentials_base64_respects_project( + module_under_test, credentials_base64 +): + """Test that project_id is used, even when there is a default project. + + https://github.com/googleapis/python-bigquery-sqlalchemy/issues/48 + """ + bqclient = module_under_test.create_bigquery_client( + credentials_base64=credentials_base64, project_id="connection-url-project", + ) + assert bqclient.project == "connection-url-project" diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index 53f92080..9400f1ed 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -4,12 +4,14 @@ # license that can be found in the LICENSE file or at # https://opensource.org/licenses/MIT. +import base64 +import json from unittest import mock import google.auth import google.auth.credentials -from google.oauth2 import service_account import pytest +from google.oauth2 import service_account class AnonymousCredentialsWithProject(google.auth.credentials.AnonymousCredentials): @@ -105,6 +107,52 @@ def test_create_bigquery_client_with_credentials_info_respects_project( assert bqclient.project == "connection-url-project" +def test_create_bigquery_client_with_credentials_base64(monkeypatch, module_under_test): + mock_service_account = mock.create_autospec(service_account.Credentials) + mock_service_account.from_service_account_info.return_value = AnonymousCredentialsWithProject( + "service-account-project" + ) + monkeypatch.setattr(service_account, "Credentials", mock_service_account) + + credentials_info = ( + {"type": "service_account", "project_id": "service-account-project"}, + ) + + credentials_base64 = base64.b64encode(json.dumps(credentials_info).encode()) + + bqclient = module_under_test.create_bigquery_client( + credentials_base64=credentials_base64 + ) + + assert bqclient.project == "service-account-project" + + +def test_create_bigquery_client_with_credentials_base64_respects_project( + monkeypatch, module_under_test +): + """Test that project_id is used, even when there is a default project. + + https://github.com/googleapis/python-bigquery-sqlalchemy/issues/48 + """ + mock_service_account = mock.create_autospec(service_account.Credentials) + mock_service_account.from_service_account_info.return_value = AnonymousCredentialsWithProject( + "service-account-project" + ) + monkeypatch.setattr(service_account, "Credentials", mock_service_account) + + credentials_info = ( + {"type": "service_account", "project_id": "service-account-project"}, + ) + + credentials_base64 = base64.b64encode(json.dumps(credentials_info).encode()) + + bqclient = module_under_test.create_bigquery_client( + credentials_base64=credentials_base64, project_id="connection-url-project", + ) + + assert bqclient.project == "connection-url-project" + + def test_create_bigquery_client_with_default_credentials( monkeypatch, module_under_test ): diff --git a/tests/unit/test_parse_url.py b/tests/unit/test_parse_url.py index b66790c0..9f080933 100644 --- a/tests/unit/test_parse_url.py +++ b/tests/unit/test_parse_url.py @@ -48,6 +48,7 @@ def url_with_everything(): return make_url( "bigquery://some-project/some-dataset" "?credentials_path=/some/path/to.json" + "&credentials_base64=eyJrZXkiOiJ2YWx1ZSJ9Cg==" "&location=some-location" "&arraysize=1000" "&list_tables_page_size=5000" @@ -72,6 +73,7 @@ def test_basic(url_with_everything): dataset_id, arraysize, credentials_path, + credentials_base64, job_config, list_tables_page_size, ) = parse_url(url_with_everything) @@ -82,6 +84,7 @@ def test_basic(url_with_everything): assert arraysize == 1000 assert list_tables_page_size == 5000 assert credentials_path == "/some/path/to.json" + assert credentials_base64 == "eyJrZXkiOiJ2YWx1ZSJ9Cg==" assert isinstance(job_config, QueryJobConfig) @@ -123,7 +126,7 @@ def test_all_values(url_with_everything, param, value, default): ) for url in url_with_everything, url_with_this_one: - job_config = parse_url(url)[5] + job_config = parse_url(url)[6] config_value = getattr(job_config, param) if callable(value): assert value(config_value) @@ -131,7 +134,7 @@ def test_all_values(url_with_everything, param, value, default): assert config_value == value url_with_nothing = make_url("bigquery://some-project/some-dataset") - job_config = parse_url(url_with_nothing)[5] + job_config = parse_url(url_with_nothing)[6] assert getattr(job_config, param) == default @@ -177,6 +180,7 @@ def test_empty_with_non_config(): dataset_id, arraysize, credentials_path, + credentials_base64, job_config, list_tables_page_size, ) = url @@ -186,6 +190,7 @@ def test_empty_with_non_config(): assert dataset_id is None assert arraysize == 1000 assert credentials_path == "/some/path/to.json" + assert credentials_base64 is None assert job_config is None assert list_tables_page_size is None @@ -198,6 +203,7 @@ def test_only_dataset(): dataset_id, arraysize, credentials_path, + credentials_base64, job_config, list_tables_page_size, ) = url @@ -207,6 +213,7 @@ def test_only_dataset(): assert dataset_id == "some-dataset" assert arraysize is None assert credentials_path is None + assert credentials_base64 is None assert list_tables_page_size is None assert isinstance(job_config, QueryJobConfig) # we can't actually test that the dataset is on the job_config,