Skip to content

Commit

Permalink
fix(bigquery): multi-project GCP setup run query through correct proj…
Browse files Browse the repository at this point in the history
…ect (#5393)
  • Loading branch information
anshbansal authored Jul 14, 2022
1 parent 7bbac5e commit 60714df
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def __init__(self, config, ctx):
def get_multiproject_project_id(
self, inspector: Optional[Inspector] = None, run_on_compute: bool = False
) -> Optional[str]:
"""
Use run_on_compute = true when running queries on storage project
where you don't have job create rights
"""
if self.config.storage_project_id and (not run_on_compute):
return self.config.storage_project_id
elif self.config.project_id:
Expand All @@ -353,6 +357,11 @@ def get_multiproject_project_id(
return None

def get_db_name(self, inspector: Inspector) -> str:
"""
DO NOT USE this to get project name when running queries.
That can cause problems with multi-project setups.
Use get_multiproject_project_id with run_on_compute = True
"""
db_name = self.get_multiproject_project_id(inspector)
# db name can't be empty here as we pass in inpector to get_multiproject_project_id
assert db_name
Expand Down Expand Up @@ -458,7 +467,7 @@ def generate_profile_candidates(
profile_clause = c if c == "" else f" WHERE {c}"[:-4]
if profile_clause == "":
return None
project_id = self.get_db_name(inspector)
project_id = self.get_multiproject_project_id(inspector, run_on_compute=True)
_client: BigQueryClient = BigQueryClient(project=project_id)
# Reading all tables' metadata to report
base_query = (
Expand Down

0 comments on commit 60714df

Please sign in to comment.