Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return bq job id from biquery.run_job() #2957

Merged
merged 6 commits into from
Jun 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions luigi/contrib/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ def run_job(self, project_id, body, dataset=None):

:param dataset:
:type dataset: BQDataset
:return: the job id of the job.
:rtype: str
:raises luigi.contrib.BigQueryExecutionError: if the job fails.
"""

if dataset and not self.dataset_exists(dataset):
Expand All @@ -348,8 +351,8 @@ def run_job(self, project_id, body, dataset=None):
status = self.client.jobs().get(projectId=project_id, jobId=job_id).execute(num_retries=10)
if status['status']['state'] == 'DONE':
if status['status'].get('errorResult'):
raise Exception('BigQuery job failed: {}'.format(status['status']['errorResult']))
return
raise BigQueryExecutionError(job_id, status['status']['errorResult'])
return job_id
tomasaschan marked this conversation as resolved.
Show resolved Hide resolved

logger.info('Waiting for job %s:%s to complete...', project_id, job_id)
time.sleep(5)
Expand Down Expand Up @@ -786,3 +789,16 @@ def run(self):
BigqueryRunQueryTask = BigQueryRunQueryTask
BigqueryCreateViewTask = BigQueryCreateViewTask
ExternalBigqueryTask = ExternalBigQueryTask


class BigQueryExecutionError(Exception):
def __init__(self, job_id, error_message) -> None:
tomasaschan marked this conversation as resolved.
Show resolved Hide resolved
"""
:param job_id: BigQuery Job ID
:type job_id: str
:param error_message: status['status']['errorResult'] for the failed job
:type error_message: str
"""
super().__init__('BigQuery job {} failed: {}'.format(job_id, error_message))
self.error_message = error_message
self.job_id = job_id
14 changes: 14 additions & 0 deletions test/contrib/bigquery_gcloud_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from avro.datafile import DataFileWriter
from avro.io import DatumWriter
from luigi.contrib.gcs import GCSTarget
from luigi.contrib.bigquery import BigQueryExecutionError

from nose.plugins.attrib import attr
from helpers import unittest
Expand Down Expand Up @@ -322,6 +323,19 @@ def test_run_query(self):

self.assertTrue(self.bq_client.table_exists(self.table))

def test_run_successful_job(self):
body = {'configuration': {'query': {'query': 'select count(*) from unnest([1,2,3])'}}}

job_id = self.bq_client.run_job(PROJECT_ID, body)

self.assertIsNotNone(job_id)
self.assertNotEqual('', job_id)

def test_run_failing_job(self):
body = {'configuration': {'query': {'query': 'this is not a valid query'}}}

self.assertRaises(BigQueryExecutionError, lambda: self.bq_client.run_job(PROJECT_ID, body))


@attr('gcloud')
class BigQueryLoadAvroTest(unittest.TestCase):
Expand Down