Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add programmatic descriptions parser for [AtlasProxy] #152

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions metadata_service/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class Config:
# Number of minimum reader count to qualify for popular table
POPULAR_TABLE_MINIMUM_READER_COUNT = 10 # type: int

# List of regexes which will exclude certain parameters from appearing as Programmatic Descriptions
PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS = [] # type: list


class LocalConfig(Config):
DEBUG = True
Expand Down
28 changes: 26 additions & 2 deletions metadata_service/proxy/atlas_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, Dict, List, Union, Optional

from amundsen_common.models.popular_table import PopularTable
from amundsen_common.models.table import Column, Statistics, Table, Tag, User, Reader
from amundsen_common.models.table import Column, Statistics, Table, Tag, User, Reader, ProgrammaticDescription
from amundsen_common.models.user import User as UserEntity
from amundsen_common.models.dashboard import DashboardSummary
from atlasclient.client import Atlas
Expand Down Expand Up @@ -363,6 +363,8 @@ def get_table(self, *, table_uri: str) -> Table:
try:
attrs = table_details[self.ATTRS_KEY]

programmatic_descriptions = self._get_programmatic_descriptions(attrs.get('parameters'))

table_qn = parse_table_qualified_name(
qualified_name=attrs.get(self.QN_KEY)
)
Expand All @@ -389,7 +391,8 @@ def get_table(self, *, table_uri: str) -> Table:
owners=[User(email=attrs.get('owner'))],
columns=columns,
table_readers=self._get_readers(attrs.get(self.QN_KEY)),
last_updated_timestamp=self._parse_date(table_details.get('updateTime')))
last_updated_timestamp=self._parse_date(table_details.get('updateTime')),
programmatic_descriptions=programmatic_descriptions)

return table
except KeyError as ex:
Expand Down Expand Up @@ -736,6 +739,27 @@ def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Rea

return results

def _get_programmatic_descriptions(self, parameters: dict) -> List[ProgrammaticDescription]:
programmatic_descriptions: Dict[str, ProgrammaticDescription] = {}

for source, text in parameters.items():
mgorsk1 marked this conversation as resolved.
Show resolved Hide resolved
use_parameter = True

for regex_filter in app.config['PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS']:
pattern = re.compile(regex_filter)

if pattern.match(source):
use_parameter = False
break

if use_parameter:
source = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", source).lower()
programmatic_descriptions[source] = ProgrammaticDescription(source=source, text=text)

result = dict(sorted(programmatic_descriptions.items()))

return list(result.values())

def get_dashboard(self,
dashboard_uri: str,
) -> DashboardDetailEntity:
Expand Down
7 changes: 6 additions & 1 deletion tests/unit/proxy/fixtures/atlas_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@ class Data:
'owner': '[email protected]',
'db': db_entity,
'popularityScore': 100,
'partitions': list()
'partitions': list(),
'parameters': {
'testParameterKeyB': 'testParameterValueB',
'testParameterKeyA': 'testParameterValueA',
'spark.sql.param': 1
}
},
'relationshipAttributes': {
'db': db_entity,
Expand Down
10 changes: 8 additions & 2 deletions tests/unit/proxy/test_atlas_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any, Dict, Optional, cast, List

from amundsen_common.models.popular_table import PopularTable
from amundsen_common.models.table import Column, Statistics, Table, Tag, User, Reader
from amundsen_common.models.table import Column, Statistics, Table, Tag, User, Reader, ProgrammaticDescription
from atlasclient.exceptions import BadRequest
from mock import MagicMock, patch
from tests.unit.proxy.fixtures.atlas_test_data import Data, DottedDict
Expand All @@ -18,6 +18,7 @@
class TestAtlasProxy(unittest.TestCase, Data):
def setUp(self) -> None:
self.app = create_app(config_module_class='metadata_service.config.LocalConfig')
self.app.config['PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS'] = ['spark.*']
self.app_context = self.app.app_context()
self.app_context.push()

Expand Down Expand Up @@ -126,7 +127,12 @@ def _get_table(self, custom_stats_format: bool = False) -> None:
description=ent_attrs['description'],
owners=[User(email=ent_attrs['owner'])],
last_updated_timestamp=int(str(self.entity1['updateTime'])[:10]),
columns=[exp_col] * self.active_columns)
columns=[exp_col] * self.active_columns,
programmatic_descriptions=[ProgrammaticDescription(source='test parameter key a',
text='testParameterValueA'),
ProgrammaticDescription(source='test parameter key b',
text='testParameterValueB')
])

self.assertEqual(str(expected), str(response))

Expand Down