Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add default cypher query for user/dashboard entity to search extractor #220

Merged
merged 1 commit into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 55 additions & 5 deletions databuilder/extractor/neo4j_search_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ class Neo4jSearchDataExtractor(Extractor):
Use Neo4jExtractor extractor class
"""
CYPHER_QUERY_CONFIG_KEY = 'cypher_query'
ENTITY_TYPE = 'entity_type'

DEFAULT_NEO4J_CYPHER_QUERY = textwrap.dedent(
DEFAULT_NEO4J_TABLE_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)
<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
Expand Down Expand Up @@ -46,19 +48,68 @@ class Neo4jSearchDataExtractor(Extractor):
"""
)

DEFAULT_NEO4J_USER_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (user:User)
OPTIONAL MATCH (user)-[read:READ]->(a)
OPTIONAL MATCH (user)-[own:OWNER_OF]->(b)
OPTIONAL MATCH (user)-[follow:FOLLOWED_BY]->(c)
OPTIONAL MATCH (user)-[manage_by:MANAGE_BY]->(manager)
{publish_tag_filter}
with user, a, b, c, read, own, follow, manager
where user.full_name is not null
return user.email as email, user.first_name as first_name, user.last_name as last_name,
user.full_name as full_name, user.github_username as github_username, user.team_name as team_name,
user.employee_type as employee_type, manager.email as manager_email,
user.slack_id as slack_id, user.is_active as is_active,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read,
count(distinct b) as total_own,
count(distinct c) AS total_follow
order by user.email
"""
)

# todo: 1. change total_read once we have the usage;
# 2. add more fields once we have in the graph; 3. change mode to generic once add more support for dashboard
DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (db:Dashboard)
OPTIONAL MATCH (db)-[:DASHBOARD_OF]->(dbg:Dashboardgroup)
OPTIONAL MATCH (db)-[:DESCRIPTION]->(db_descr:Description)
OPTIONAL MATCH (dbg)-[:DESCRIPTION]->(dbg_descr:Description)
{publish_tag_filter}
with db, dbg, db_descr, dbg_descr
where db.name is not null
return dbg.name as dashboard_group, db.name as dashboard_name,
coalesce(db_descr.description, '') as description,
coalesce(dbg.description, '') as dashboard_group_description,
'mode' as product,
1 AS total_usage
order by dbg.name
"""
)

# todo: we will add more once we add more entities
DEFAULT_QUERY_BY_ENTITY = {
'table': DEFAULT_NEO4J_TABLE_CYPHER_QUERY,
'user': DEFAULT_NEO4J_USER_CYPHER_QUERY,
'dashboard': DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY
}

def init(self, conf):
# type: (ConfigTree) -> None
"""
Initialize Neo4jExtractor object from configuration and use that for extraction
"""
self.conf = conf

# extract cypher query from conf, if specified, else use default query
if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf:
self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY)
else:
entity_type = conf.get_string(Neo4jSearchDataExtractor.ENTITY_TYPE, default='table').lower()
default_query = Neo4jSearchDataExtractor.DEFAULT_QUERY_BY_ENTITY[entity_type]
self.cypher_query = self._add_publish_tag_filter(conf.get_string(JOB_PUBLISH_TAG, ''),
Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY)
cypher_query=default_query)

self.neo4j_extractor = Neo4jExtractor()
# write the cypher query in configs in Neo4jExtractor scope
Expand Down Expand Up @@ -98,5 +149,4 @@ def _add_publish_tag_filter(self, publish_tag, cypher_query):
publish_tag_filter = ''
else:
publish_tag_filter = """WHERE table.published_tag = '{}'""".format(publish_tag)

return cypher_query.format(publish_tag_filter=publish_tag_filter)
24 changes: 24 additions & 0 deletions tests/unit/extractor/test_neo4j_search_data_extractor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import unittest
from mock import patch

from pyhocon import ConfigFactory
from databuilder import Scoped
from databuilder.extractor.neo4j_extractor import Neo4jExtractor
from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor


Expand All @@ -18,6 +23,25 @@ def test_not_adding_filter(self):

self.assertEqual(actual, """MATCH (table:Table) RETURN table""")

def test_default_search_query(self):
# type: (Any) -> None
with patch.object(Neo4jExtractor, '_get_driver'):
extractor = Neo4jSearchDataExtractor()
conf = ConfigFactory.from_dict({
'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY):
'test-endpoint',
'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER):
'test-user',
'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW):
'test-passwd',
'extractor.search_data.{}'.format(Neo4jSearchDataExtractor.ENTITY_TYPE):
'dashboard',
})
extractor.init(Scoped.get_scoped_conf(conf=conf,
scope=extractor.get_scope()))
self.assertEqual(extractor.cypher_query, Neo4jSearchDataExtractor
.DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY.format(publish_tag_filter=''))


if __name__ == '__main__':
unittest.main()