Skip to content

Commit

Permalink
Fixing table sanitazation
Browse files Browse the repository at this point in the history
  • Loading branch information
treff7es committed Sep 27, 2022
1 parent d8216d9 commit 7f334ce
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class BigqueryTableIdentifier:
table: str

invalid_chars: ClassVar[Set[str]] = {"$", "@"}
_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[str] = "((.+)[_$])?(\\d{4,10})$"
_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[str] = "((.+)[_$])?(\\d{8})$"

@staticmethod
def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]:
Expand All @@ -101,17 +101,10 @@ def from_string_name(cls, table: str) -> "BigqueryTableIdentifier":
def raw_table_name(self):
return f"{self.project_id}.{self.dataset}.{self.table}"

@staticmethod
def _remove_suffix(input_string: str, suffixes: List[str]) -> str:
for suffix in suffixes:
if input_string.endswith(suffix):
return input_string[: -len(suffix)]
return input_string

def get_table_display_name(self) -> str:
shortened_table_name = self.table
# if table name ends in _* or * then we strip it as that represents a query on a sharded table
shortened_table_name = self._remove_suffix(shortened_table_name, ["_*", "*"])
shortened_table_name = re.sub("(_(.+)?\\*)|\\*$", "", shortened_table_name)

table_name, _ = self.get_table_and_shard(shortened_table_name)
if not table_name:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,19 +268,12 @@ def is_temporary_table(self, prefix: str) -> bool:
# Temporary tables will have a dataset that begins with an underscore.
return self.dataset.startswith(prefix)

@staticmethod
def remove_suffix(input_string, suffix):
if suffix and input_string.endswith(suffix):
return input_string[: -len(suffix)]
return input_string

def remove_extras(self, sharded_table_regex: str) -> "BigQueryTableRef":
# Handle partitioned and sharded tables.
table_name: Optional[str] = None
shortened_table_name = self.table
# if table name ends in _* or * then we strip it as that represents a query on a sharded table
shortened_table_name = self.remove_suffix(shortened_table_name, "_*")
shortened_table_name = self.remove_suffix(shortened_table_name, "*")
shortened_table_name = re.sub("(_(.+)?\\*)|\\*$", "", shortened_table_name)

matches = re.match(sharded_table_regex, shortened_table_name)
if matches:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_bigquery_ref_extra_removal():

table_ref = BigQueryTableRef("project-1234", "dataset-4567", "foo_2022")
new_table_ref = table_ref.remove_extras(_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX)
assert new_table_ref.table == "foo"
assert new_table_ref.table == "foo_2022"
assert new_table_ref.project == table_ref.project
assert new_table_ref.dataset == table_ref.dataset

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def test_bigquery_table_sanitasitation():
new_table_ref = BigqueryTableIdentifier.from_string_name(
table_ref.table_identifier.get_table_name()
)
assert new_table_ref.table == "foo"
assert new_table_ref.table == "foo_2022"
assert new_table_ref.project_id == "project-1234"
assert new_table_ref.dataset == "dataset-4567"

Expand Down

0 comments on commit 7f334ce

Please sign in to comment.