From 8ff51e3a4afc1fcca4b46b5bff16077b076e8374 Mon Sep 17 00:00:00 2001 From: feng-tao Date: Wed, 11 Mar 2020 12:24:38 -0700 Subject: [PATCH] Add default cypher query for user/dashboard entity to search extractor --- .../extractor/neo4j_search_data_extractor.py | 60 +++++++++++++++++-- .../test_neo4j_search_data_extractor.py | 24 ++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/databuilder/extractor/neo4j_search_data_extractor.py b/databuilder/extractor/neo4j_search_data_extractor.py index 1cc22e101..5e220cac1 100644 --- a/databuilder/extractor/neo4j_search_data_extractor.py +++ b/databuilder/extractor/neo4j_search_data_extractor.py @@ -15,10 +15,12 @@ class Neo4jSearchDataExtractor(Extractor): Use Neo4jExtractor extractor class """ CYPHER_QUERY_CONFIG_KEY = 'cypher_query' + ENTITY_TYPE = 'entity_type' - DEFAULT_NEO4J_CYPHER_QUERY = textwrap.dedent( + DEFAULT_NEO4J_TABLE_CYPHER_QUERY = textwrap.dedent( """ - MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table) + MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster) + <-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table) {publish_tag_filter} OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description) OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default' @@ -46,19 +48,68 @@ class Neo4jSearchDataExtractor(Extractor): """ ) + DEFAULT_NEO4J_USER_CYPHER_QUERY = textwrap.dedent( + """ + MATCH (user:User) + OPTIONAL MATCH (user)-[read:READ]->(a) + OPTIONAL MATCH (user)-[own:OWNER_OF]->(b) + OPTIONAL MATCH (user)-[follow:FOLLOWED_BY]->(c) + OPTIONAL MATCH (user)-[manage_by:MANAGE_BY]->(manager) + {publish_tag_filter} + with user, a, b, c, read, own, follow, manager + where user.full_name is not null + return user.email as email, user.first_name as first_name, user.last_name as last_name, + user.full_name as full_name, user.github_username as github_username, user.team_name as team_name, + user.employee_type as employee_type, manager.email as manager_email, + user.slack_id as slack_id, user.is_active as is_active, + REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read, + count(distinct b) as total_own, + count(distinct c) AS total_follow + order by user.email + """ + ) + + # todo: 1. change total_read once we have the usage; + # 2. add more fields once we have in the graph; 3. change mode to generic once add more support for dashboard + DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY = textwrap.dedent( + """ + MATCH (db:Dashboard) + OPTIONAL MATCH (db)-[:DASHBOARD_OF]->(dbg:Dashboardgroup) + OPTIONAL MATCH (db)-[:DESCRIPTION]->(db_descr:Description) + OPTIONAL MATCH (dbg)-[:DESCRIPTION]->(dbg_descr:Description) + {publish_tag_filter} + with db, dbg, db_descr, dbg_descr + where db.name is not null + return dbg.name as dashboard_group, db.name as dashboard_name, + coalesce(db_descr.description, '') as description, + coalesce(dbg.description, '') as dashboard_group_description, + 'mode' as product, + 1 AS total_usage + order by dbg.name + """ + ) + + # todo: we will add more once we add more entities + DEFAULT_QUERY_BY_ENTITY = { + 'table': DEFAULT_NEO4J_TABLE_CYPHER_QUERY, + 'user': DEFAULT_NEO4J_USER_CYPHER_QUERY, + 'dashboard': DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY + } + def init(self, conf): # type: (ConfigTree) -> None """ Initialize Neo4jExtractor object from configuration and use that for extraction """ self.conf = conf - # extract cypher query from conf, if specified, else use default query if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf: self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY) else: + entity_type = conf.get_string(Neo4jSearchDataExtractor.ENTITY_TYPE, default='table').lower() + default_query = Neo4jSearchDataExtractor.DEFAULT_QUERY_BY_ENTITY[entity_type] self.cypher_query = self._add_publish_tag_filter(conf.get_string(JOB_PUBLISH_TAG, ''), - Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY) + cypher_query=default_query) self.neo4j_extractor = Neo4jExtractor() # write the cypher query in configs in Neo4jExtractor scope @@ -98,5 +149,4 @@ def _add_publish_tag_filter(self, publish_tag, cypher_query): publish_tag_filter = '' else: publish_tag_filter = """WHERE table.published_tag = '{}'""".format(publish_tag) - return cypher_query.format(publish_tag_filter=publish_tag_filter) diff --git a/tests/unit/extractor/test_neo4j_search_data_extractor.py b/tests/unit/extractor/test_neo4j_search_data_extractor.py index 31f021181..3803aad0b 100644 --- a/tests/unit/extractor/test_neo4j_search_data_extractor.py +++ b/tests/unit/extractor/test_neo4j_search_data_extractor.py @@ -1,4 +1,9 @@ import unittest +from mock import patch + +from pyhocon import ConfigFactory +from databuilder import Scoped +from databuilder.extractor.neo4j_extractor import Neo4jExtractor from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor @@ -18,6 +23,25 @@ def test_not_adding_filter(self): self.assertEqual(actual, """MATCH (table:Table) RETURN table""") + def test_default_search_query(self): + # type: (Any) -> None + with patch.object(Neo4jExtractor, '_get_driver'): + extractor = Neo4jSearchDataExtractor() + conf = ConfigFactory.from_dict({ + 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY): + 'test-endpoint', + 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER): + 'test-user', + 'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW): + 'test-passwd', + 'extractor.search_data.{}'.format(Neo4jSearchDataExtractor.ENTITY_TYPE): + 'dashboard', + }) + extractor.init(Scoped.get_scoped_conf(conf=conf, + scope=extractor.get_scope())) + self.assertEqual(extractor.cypher_query, Neo4jSearchDataExtractor + .DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY.format(publish_tag_filter='')) + if __name__ == '__main__': unittest.main()