Release/2.9.1 (#205)

* Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <[email protected]> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <[email protected]> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <[email protected]> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> Co-authored-by: Ginnie Hench <[email protected]> Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> Co-authored-by: Ginnie Hench <[email protected]> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <[email protected]> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <[email protected]> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> Co-authored-by: Ginnie Hench <[email protected]> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> Co-authored-by: Ginnie Hench <[email protected]> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Release/2.9.1 Renames SPARC datasets as SPARC instead of dbgap Co-authored-by: Carl Schreep <[email protected]> Co-authored-by: Yaphetkg <[email protected]> Co-authored-by: Ginnie Hench <[email protected]> Co-authored-by: Howard Lander <[email protected]>
helxplatform · Apr 11, 2022 · 293fe16 · 293fe16
1 parent bb661be
commit 293fe16
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 22 deletions.
diff --git a/src/dug/_version.py b/src/dug/_version.py
@@ -1 +1 @@
-__version__ = "2.9.0"
+__version__ = "2.9.1rc"
diff --git a/src/dug/config.py b/src/dug/config.py
@@ -67,7 +67,15 @@ class Config:
         # Dug element type to cast the query kg nodes to
         "cde": {
             # Parse nodes matching criteria in kg
-            "node_type": "biolink:Publication"
+            "node_type": "biolink:Publication",
+            "curie_prefix": "HEALCDE",
+            "attribute_mapping": {
+                # "DugElement Attribute" : "KG Node attribute"
+                "name": "name",
+                "desc": "summary",
+                "collection_name": "cde_category",
+                "collection_id":  "cde_category"
+            }
         }
     })
 

diff --git a/src/dug/core/crawler.py b/src/dug/core/crawler.py
@@ -206,6 +206,8 @@ def expand_to_dug_element(self,
         elements = []
         # using node_type as the primary criteria for matching nodes to element type.
         target_node_type = casting_config["node_type"]
+        curie_filter = casting_config["curie_prefix"]
+        attribute_mapping = casting_config["attribute_mapping"]
         target_node_type_snake_case = biolink_snake_case(target_node_type.replace("biolink:", ""))
         for ident_id, identifier in concept.identifiers.items():
 
@@ -244,14 +246,14 @@ def expand_to_dug_element(self,
                     # and return the variables.
                     for node_id, node in answer.nodes.items():
                         if target_node_type in node["category"]:
-                            # @TODO make element creation more generic
-                            # @TODO need to encode more data into the graph nodes, to parse them properly
-                            element = DugElement(
-                                elem_id=node_id,
-                                name=node.get('name', ""),
-                                desc=node.get('summary', ""),
-                                elem_type=dug_element_type
-                            )
-                            element.add_concept(concept)
-                            elements.append(element)
+                            if node['id'].startswith(curie_filter):
+                                element_attribute_args = {"elem_id": node_id, "elem_type": dug_element_type}
+                                element_attribute_args.update({key: node.get(attribute_mapping[key], "")
+                                                               for key in attribute_mapping
+                                                               })
+                                element = DugElement(
+                                    **element_attribute_args
+                                )
+                                element.add_concept(concept)
+                                elements.append(element)
         return elements
diff --git a/src/dug/core/factory.py b/src/dug/core/factory.py
@@ -81,7 +81,7 @@ def build_search_obj(self, indices) -> Search:
         return Search(self.config, indices=indices)
 
     def build_element_extraction_parameters(self, source=None):
-        # Method reformats the node_to_element_queries object 
+        # Method reformats the node_to_element_queries object
         # Uses tranql source use for concept crawling
         if source is None:
             source = TRANQL_SOURCE
@@ -96,7 +96,9 @@ def build_element_extraction_parameters(self, source=None):
             {
                 "output_dug_type": dug_type,
                 "casting_config": {
-                    "node_type": queries[dug_type]['node_type']
+                    "node_type": queries[dug_type]["node_type"],
+                    "curie_prefix": queries[dug_type]["curie_prefix"],
+                    "attribute_mapping": queries[dug_type]["attribute_mapping"]
                     # CDE's are only ones
                     # but if we had two biolink:Publication nodes we want to conditionally
                     # cast to other output_dug_type, we could extend this config

diff --git a/src/dug/core/parsers/scicrunch_parser.py b/src/dug/core/parsers/scicrunch_parser.py
@@ -68,7 +68,7 @@ def __call__(self, input_file: InputFile) -> List[Indexable]:
             elem = DugElement(elem_id=f"{variable.attrib['id']}.p{participant_set}",
                               name=variable.find('name').text,
                               desc=variable.find('description').text.lower(),
-                              elem_type="DbGaP",
+                              elem_type="SPARC",
                               collection_id=f"{study_id}.p{participant_set}",
                               collection_name=study_name)
 

diff --git a/tests/unit/mocks/data/tranql_response.json b/tests/unit/mocks/data/tranql_response.json
@@ -8,14 +8,14 @@
         }
       },
       "nodes": {
+        "publication": {
+          "category": "biolink:Publication"
+        },
         "disease": {
           "category": "biolink:Disease",
           "id": [
             "MONDO:0008187"
           ]
-        },
-        "publication": {
-          "category": "biolink:Publication"
         }
       }
     },
@@ -25,11 +25,11 @@
           "name": "panic disorder 1",
           "category": [
             "biolink:Disease",
-            "biolink:Entity",
+            "biolink:DiseaseOrPhenotypicFeature",
             "biolink:ThingWithTaxon",
             "biolink:BiologicalEntity",
-            "biolink:NamedThing",
-            "biolink:DiseaseOrPhenotypicFeature"
+            "biolink:Entity",
+            "biolink:NamedThing"
           ],
           "attributes": [
             {
@@ -72,6 +72,30 @@
               "value": "Filename: Photosensitivity_PAQ_CDE_v1.0.json; File path: Supplemental Questionnaires/Sensory/Photosensitivity Assessment Questionnaire (PAQ); Photosensitivity Assessment Questionnaire",
               "name": "summary"
             },
+            {
+              "type": "NA",
+              "value": [
+                "Supplemental Questionnaires",
+                "Sensory",
+                "Photosensitivity Assessment Questionnaire (PAQ)"
+              ],
+              "name": "cde_categories"
+            },
+            {
+              "type": "NA",
+              "value": [
+                "Supplemental Questionnaires",
+                "Adult/Pediatric",
+                "Acute/Chronic Pain",
+                "Photosensitivity Assessment Questionnaire (PAQ)"
+              ],
+              "name": "cde_category_extended"
+            },
+            {
+              "type": "NA",
+              "value": "Supplemental Questionnaires",
+              "name": "cde_category"
+            },
             {
               "type": "NA",
               "value": [

diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py
@@ -84,7 +84,16 @@ def test_expand_to_dug_element(crawler):
     concept.add_identifier(identifier)
     new_elements = crawler.expand_to_dug_element(
         concept=concept,
-        casting_config={"node_type": "biolink:Publication"},
+        casting_config={
+            "node_type": "biolink:Publication",
+            "curie_prefix": "HEALCDE",
+            "attribute_mapping": {
+                "name": "name",
+                "desc": "summary",
+                "collection_name": "cde_category",
+                "collection_id":  "cde_category"
+            }
+        },
         dug_element_type="test-element",
         tranql_source="test:graph"
     )