Skip to content

Commit

Permalink
feat(airflow): support kafka sinks in the airflow plugin (#6508)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Nov 22, 2022
1 parent 3fdaf13 commit 1229c1f
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions metadata-ingestion/src/datahub_provider/_plugin.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from datahub_provider._airflow_compat import Operator

import contextlib
import logging
import traceback
from typing import Any, Iterable, List
from typing import Any, Callable, Iterable, List, Optional

from airflow.configuration import conf
from airflow.lineage import PIPELINE_OUTLETS
Expand Down Expand Up @@ -105,6 +106,19 @@ def get_inlets_from_task(task: BaseOperator, context: Any) -> Iterable[Any]:
return inlets


def _make_emit_callback(
logger: logging.Logger,
goal: str,
) -> Callable[[Optional[Exception], str], None]:
def kafka_callback(err: Optional[Exception], msg: str) -> None:
if not err:
logger.info(f"Sent metadata to datahub {goal}")
else:
logger.error(f"Failed to send {goal} to datahub: {msg}", exc_info=err)

return kafka_callback


def datahub_task_status_callback(context, status):
ti = context["ti"]
task: "BaseOperator" = ti.task
Expand All @@ -126,9 +140,9 @@ def datahub_task_status_callback(context, status):
capture_tags=context["_datahub_config"].capture_tags_info,
capture_owner=context["_datahub_config"].capture_ownership_info,
)
dataflow.emit(emitter)

task.log.info(f"Emitted Datahub DataFlow: {dataflow}")
dataflow.emit(
emitter, callback=_make_emit_callback(task.log, f"Datahub DataFlow {dataflow}")
)

datajob = AirflowGenerator.generate_datajob(
cluster=context["_datahub_config"].cluster,
Expand All @@ -145,8 +159,9 @@ def datahub_task_status_callback(context, status):
for outlet in task_outlets:
datajob.outlets.append(outlet.urn)

task.log.info(f"Emitted Datahub dataJob: {datajob}")
datajob.emit(emitter)
datajob.emit(
emitter, callback=_make_emit_callback(task.log, f"Datahub dataJob {datajob}")
)

if context["_datahub_config"].capture_executions:
dpi = AirflowGenerator.run_datajob(
Expand Down Expand Up @@ -207,8 +222,9 @@ def datahub_pre_execution(context):
for outlet in task_outlets:
datajob.outlets.append(outlet.urn)

datajob.emit(emitter)
task.log.info(f"Emitting Datahub DataJob: {datajob}")
datajob.emit(
emitter, callback=_make_emit_callback(task.log, f"Datahub dataJob {datajob}")
)

if context["_datahub_config"].capture_executions:
dpi = AirflowGenerator.run_datajob(
Expand Down

0 comments on commit 1229c1f

Please sign in to comment.