Skip to content

Commit

Permalink
fix(ingest): bigquery-beta - fix for missing key error if dataset was…
Browse files Browse the repository at this point in the history
… empty (#6133)
  • Loading branch information
treff7es authored Oct 6, 2022
1 parent 928f294 commit 6db0925
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
3 changes: 3 additions & 0 deletions metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ If you have multiple projects in your BigQuery setup, the role should be granted
| `bigquery.readsessions.getData` | Get data from the read session. |
| `resourcemanager.projects.get` | Retrieve project names and metadata. |

You can use the following predefined IAM role which has all the needed permissions as well:
- [roles/bigquery.metadataViewer](https://cloud.google.com/bigquery/docs/access-control#bigquery.metadataViewer)

##### Lineage/usage generation requirements

Additional requirements needed on the top of the basic requirements.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,17 @@ def add_table_to_dataset_container(
yield wu

def get_workunits(self) -> Iterable[WorkUnit]:

logger.info("Getting projects")
conn: bigquery.Client = self.get_bigquery_client()
self.add_config_to_report()

projects: List[BigqueryProject] = BigQueryDataDictionary.get_projects(conn)
if len(projects) == 0:
logger.warning(
"Get projects didn't return any project. Maybe resourcemanager.projects.get permission is missing for the service account. You can assign predefined roles/bigquery.metadataViewer role to your service account."
)
return

for project_id in projects:
if not self.config.project_id_pattern.allowed(project_id.id):
self.report.report_dropped(project_id.id)
Expand All @@ -446,6 +452,7 @@ def get_workunits(self) -> Iterable[WorkUnit]:
yield from self._process_project(conn, project_id)

if self.config.profiling.enabled:
logger.info("Starting profiling...")
yield from self.profiler.get_workunits(self.db_tables)

# Clean up stale entities if configured.
Expand Down Expand Up @@ -475,6 +482,12 @@ def _process_project(
)
return None

if len(bigquery_project.datasets) == 0:
logger.warning(
f"No dataset found in {project_id}. Either there are no datasets in this project or missing bigquery.datasets.get permission. You can assign predefined roles/bigquery.metadataViewer role to your service account."
)
return

for bigquery_dataset in bigquery_project.datasets:

if not self.config.dataset_pattern.allowed(bigquery_dataset.name):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,8 @@ def _aggregate_enriched_read_events(
try:
resource = event.read_event.resource.get_sanitized_table_ref()
if (
resource.table_identifier.get_table_display_name()
resource.table_identifier.dataset not in tables
or resource.table_identifier.get_table_display_name()
not in tables[resource.table_identifier.dataset]
):
logger.debug(f"Skipping non existing {resource} from usage")
Expand Down

0 comments on commit 6db0925

Please sign in to comment.