From e39123dbfec036ebcbccf744005d8301a89d1479 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 25 Jul 2019 16:43:22 +0200 Subject: [PATCH] Add StandardSqlDataTypes enum to BigQuery This is a convenience enum that contains scalar SQL data types constants (a subsset of types defined in the gapic enum generated from the .proto definitions). --- bigquery/google/cloud/bigquery/__init__.py | 2 + bigquery/google/cloud/bigquery/enums.py | 84 +++++++++++++++++++++ bigquery/tests/unit/test_enums.py | 85 ++++++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 bigquery/google/cloud/bigquery/enums.py create mode 100644 bigquery/tests/unit/test_enums.py diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py index b84051fc6be11..c41ceb6b03066 100644 --- a/bigquery/google/cloud/bigquery/__init__.py +++ b/bigquery/google/cloud/bigquery/__init__.py @@ -36,6 +36,7 @@ from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -130,6 +131,7 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", + "StandardSqlDataTypes", "SourceFormat", "WriteDisposition", ] diff --git a/bigquery/google/cloud/bigquery/enums.py b/bigquery/google/cloud/bigquery/enums.py new file mode 100644 index 0000000000000..f35e3fda3340d --- /dev/null +++ b/bigquery/google/cloud/bigquery/enums.py @@ -0,0 +1,84 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import warnings + +import enum +import six + +from google.cloud.bigquery_v2.gapic import enums as gapic_enums + + +def _make_sql_scalars_enum(): + """Create an enum based on a gapic enum containing only SQL scalar types.""" + sql_scalar_types = frozenset( + ( + "INT64", + "BOOL", + "FLOAT64", + "STRING", + "BYTES", + "TIMESTAMP", + "DATE", + "TIME", + "DATETIME", + "NUMERIC", + ) + ) + excluded_members = frozenset( + ("TYPE_KIND_UNSPECIFIED", "GEOGRAPHY", "ARRAY", "STRUCT") + ) + + # Sanity check - we do not want the new enum to go out of sync with the original + # enum from gapic. + # ASSUMPTION: No existing types are ever renamed or deleted, we only try to + # detect cases when new types are introduced. + gapic_names = set(m.name for m in gapic_enums.StandardSqlDataType.TypeKind) + anticipated_names = sql_scalar_types | excluded_members + unhandled_names = gapic_names - anticipated_names + + if unhandled_names: + msg = ( + "The StandardSqlDataTypes enum migh be out of sync with the " + "original StandardSqlDataType.TypeKind enum from gapic. Check " + "enum members: {}".format(", ".join(unhandled_names)) + ) + warnings.warn(msg, UserWarning) + + new_enum = enum.Enum( + "StandardSqlDataTypes", + ( + (member.name, member.value) + for member in gapic_enums.StandardSqlDataType.TypeKind + if member.name in sql_scalar_types + ), + ) + + # make sure the docstring for the new enum is also correct + orig_doc = gapic_enums.StandardSqlDataType.TypeKind.__doc__ + skip_pattern = re.compile( + "|".join(excluded_members) + + "|because a JSON object" # the second description line of STRUCT member + ) + + new_doc = "\n".join( + six.moves.filterfalse(skip_pattern.search, orig_doc.splitlines()) + ) + new_enum.__doc__ = new_doc + + return new_enum + + +StandardSqlDataTypes = _make_sql_scalars_enum() diff --git a/bigquery/tests/unit/test_enums.py b/bigquery/tests/unit/test_enums.py new file mode 100644 index 0000000000000..0e104d53ee8aa --- /dev/null +++ b/bigquery/tests/unit/test_enums.py @@ -0,0 +1,85 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import enum +import mock +import pytest +import six + + +@pytest.fixture +def enum_under_test(): + from google.cloud.bigquery.enums import StandardSqlDataTypes + + return StandardSqlDataTypes + + +@pytest.fixture +def gapic_enum(): + """The referential autogenerated enum the enum under test is based on.""" + from google.cloud.bigquery_v2.gapic.enums import StandardSqlDataType + + return StandardSqlDataType.TypeKind + + +def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): + # check the presence of a few typical SQL types + for name in ("INT64", "FLOAT64", "DATE", "BOOL"): + assert name in enum_under_test.__members__ + + # the enum members must match those in the original gapic enum + for member in enum_under_test: + assert member.name in gapic_enum.__members__ + assert member.value == gapic_enum[member.name].value + + # check a few members that should *not* be copied over from the gapic enum + for name in ("GEOGRAPHY", "ARRAY"): + assert name in gapic_enum.__members__ + assert name not in enum_under_test.__members__ + + +def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): + assert "STRUCT (int):" not in enum_under_test.__doc__ + assert "BOOL (int):" in enum_under_test.__doc__ + assert "TIME (int):" in enum_under_test.__doc__ + + # all lines in the docstring should actually come from the original docstring + doc_lines = enum_under_test.__doc__.splitlines() + assert set(doc_lines) <= set(gapic_enum.__doc__.splitlines()) + + +def test_standard_sql_types_enum_warning_on_new_added_types(gapic_enum): + class ReplacementEnum(enum.IntEnum): + """Fake enum with some new database types.""" + + INT64 = 2 + TIMESTAMP = 19 + TROOLEAN = 911 # One of {True, False, FileNotFound}. (OMG, help!) + + gapic_enum_patch = mock.patch( + "google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType.TypeKind", + new=ReplacementEnum, + ) + + from google.cloud.bigquery import enums + + with pytest.warns(UserWarning) as warn_record, gapic_enum_patch: + enums = six.moves.reload_module(enums) + + try: + warning_msg = str(warn_record[0].message) + assert "StandardSqlDataTypes" in warning_msg + assert "out of sync" in warning_msg + finally: + six.moves.reload_module(enums) # regenerate enum with original gapic enum