Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(ingest): clean up exception types #6818

Merged
merged 3 commits into from
Dec 21, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metadata-ingestion/docs/sources/glue/glue.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
## Compatibility
### Compatibility

To capture lineage across Glue jobs and databases, a requirements must be met – otherwise the AWS API is unable to report any lineage. The job must be created in Glue Studio with the "Generate classic script" option turned on (this option can be accessed in the "Script" tab). Any custom scripts that do not have the proper annotations will not have reported lineage.
16 changes: 10 additions & 6 deletions metadata-ingestion/src/datahub/configuration/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,19 @@ class DynamicTypedConfig(ConfigModel):


class MetaError(Exception):
"""A base class for all meta exceptions"""
"""A base class for all meta exceptions."""


class PipelineExecutionError(MetaError):
"""An error occurred when executing the pipeline"""
"""An error occurred when executing the pipeline."""


class OperationalError(PipelineExecutionError):
"""An error occurred because of client-provided metadata"""
class GraphError(MetaError):
"""An error in communicating with the DataHub Graph."""


class OperationalError(GraphError):
"""A GraphError with extra debug annotations."""

message: str
info: dict
Expand All @@ -147,11 +151,11 @@ def __init__(self, message: str, info: Optional[dict] = None):


class ConfigurationError(MetaError):
"""A configuration error has happened"""
"""A configuration error."""


class IgnorableError(MetaError):
"""An error that can be ignored"""
"""An error that can be ignored."""


class ConfigurationMechanism(ABC):
Expand Down
6 changes: 3 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from requests.models import HTTPError

from datahub.cli.cli_utils import get_boolean_env_variable
from datahub.configuration.common import ConfigModel, OperationalError
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
from datahub.emitter.mce_builder import Aspect
from datahub.emitter.rest_emitter import DatahubRestEmitter
from datahub.emitter.serialization_helper import post_json_transform
Expand Down Expand Up @@ -157,7 +157,7 @@ def get_aspect(
post_json_obj = post_json_transform(aspect_json)
return aspect_type.from_obj(post_json_obj)
else:
raise OperationalError(
raise GraphError(
f"Failed to find {aspect_type_name} in response {response_json}"
)

Expand Down Expand Up @@ -297,7 +297,7 @@ def get_latest_timeseries_value(
if aspect_json:
return aspect_type.from_obj(json.loads(aspect_json), tuples=False)
else:
raise OperationalError(
raise GraphError(
f"Failed to find {aspect_type} in response {aspect_json}"
)
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class GlueSource(StatefulIngestionSourceBase):
- Table metadata, such as owner, description and parameters
- Jobs and their component transformations, data sources, and data sinks

## IAM permissions
### IAM permissions

For ingesting datasets, the following IAM permissions are required:
```json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,9 @@ def _extract_metadata_from_sql_query(
sql_query = derived_table["sql"]
reporter.query_parse_attempts += 1

# Skip queries that contain liquid variables. We currently don't parse them correctly
# Skip queries that contain liquid variables. We currently don't parse them correctly.
# Docs: https://cloud.google.com/looker/docs/liquid-variable-reference.
# TODO: also support ${EXTENDS} and ${TABLE}
if "{%" in sql_query:
try:
# test if parsing works
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import json
import re
import time
import warnings
from typing import Any, Dict, Generator, List, Optional, Tuple

import requests
import yaml
from requests.auth import HTTPBasicAuth

from datahub.metadata.com.linkedin.pegasus2avro.common import AuditStamp
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
OtherSchemaClass,
SchemaField,
Expand Down Expand Up @@ -385,16 +383,12 @@ def set_metadata(
)
canonical_schema.append(field)

actor = "urn:li:corpuser:etl"
sys_time = int(time.time() * 1000)
schema_metadata = SchemaMetadata(
schemaName=dataset_name,
platform=f"urn:li:dataPlatform:{platform}",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

version=0,
hash="",
platformSchema=OtherSchemaClass(rawSchema=""),
created=AuditStamp(time=sys_time, actor=actor),
lastModified=AuditStamp(time=sys_time, actor=actor),
fields=canonical_schema,
)
return schema_metadata
5 changes: 1 addition & 4 deletions metadata-ingestion/tests/integration/lookml/test_lookml.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,8 @@ def test_lookml_bad_sql_parser(pytestconfig, tmp_path, mock_time):
pipeline.run()
pipeline.pretty_print_summary()
pipeline.raise_from_status(raise_warnings=False)
try:
with pytest.raises(PipelineExecutionError): # we expect the source to have warnings
pipeline.raise_from_status(raise_warnings=True)
assert False, "Pipeline should have generated warnings"
except PipelineExecutionError:
pass

mce_helpers.check_golden_file(
pytestconfig,
Expand Down