Skip to content

Commit

Permalink
Merge pull request #12 from microbiomedata/10-functional-annotation-agg
Browse files Browse the repository at this point in the history
10 functional annotation agg
  • Loading branch information
hesspnnl authored Feb 3, 2025
2 parents 0c58aaf + 1aaa9e8 commit 8cd9024
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 0 deletions.
14 changes: 14 additions & 0 deletions docs/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ Latitude Longitude Module
:undoc-members:
:show-inheritance:

Functional Search Module
~~~~~~~~~~
.. autoclass:: nmdc_notebook_tools.functional_search.FunctionalSearch
:members:
:undoc-members:
:show-inheritance:

Collection Helpers
~~~~~~~~~~
.. autoclass:: nmdc_notebook_tools.collection_helpers.CollectionHelpers
Expand Down Expand Up @@ -138,6 +145,13 @@ Study Subclass
:undoc-members:
:show-inheritance:

Functional Annotation Agg Subclass
~~~~~~~~~~
.. autoclass:: nmdc_notebook_tools.functional_annotation_agg_search.FunctionalAnnotationAggSearch
:members:
:undoc-members:
:show-inheritance:

Workflow Execution Subclass
~~~~~~~~~~
.. autoclass:: nmdc_notebook_tools.workflow_execution_search.WorkflowExecutionSearch
Expand Down
14 changes: 14 additions & 0 deletions nmdc_notebook_tools/functional_annotation_agg_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
from nmdc_notebook_tools.functional_search import FunctionalSearch
import logging

logger = logging.getLogger(__name__)


class FunctionalAnnotationAggSearch(FunctionalSearch):
"""
Class to interact with the NMDC API to get functional annotation agg sets. These are most helpful when trying identify workflows associted with a KEGG, COG, or PFAM ids.
"""

def __init__(self):
super().__init__()
73 changes: 73 additions & 0 deletions nmdc_notebook_tools/functional_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-

from nmdc_notebook_tools.collection_search import CollectionSearch


class FunctionalSearch:
"""
Class to interact with the NMDC API to filter functional annotations by KEGG, COG, or PFAM ids.
"""

def __init__(self):
self.collectioninstance = CollectionSearch("functional_annotation_agg")

def get_functional_annotations(
self,
annotation: str,
annotation_type: str,
page_size=25,
fields="",
all_pages=False,
):
"""
Get a record from the NMDC API by id. ID types can be KEGG, COG, or PFAM.
params:
annotation: str
The data base id to query the function annotations.
annotation_type:
The type of id to query. MUST be one of the following:
KEGG
COG
PFAM
page_size: int
The number of results to return per page. Default is 25.
fields: str
The fields to return. Default is all fields.
Example: "id,name"
all_pages: bool
True to return all pages. False to return the first page. Default is False.
"""
if annotation_type not in ["KEGG", "COG", "PFAM"]:
raise ValueError("id_type must be one of the following: KEGG, COG, PFAM")
if annotation_type == "KEGG":
formatted_annotation_type = f"KEGG.ORTHOLOGY:{annotation}"
elif annotation_type == "COG":
formatted_annotation_type = f"COG:{annotation}"
elif annotation_type == "PFAM":
formatted_annotation_type = f"PFAM:{annotation}"

filter = f'{{"gene_function_id": "{formatted_annotation_type}"}}'

result = self.collectioninstance.get_record_by_filter(
filter, page_size, fields, all_pages
)
return result

def get_records(
self,
filter: str = "",
max_page_size: int = 100,
fields: str = "",
all_pages: bool = False,
):
"""
Get a collection of data from the NMDC API. Generic function to get a collection of data from the NMDC API. Can provide a specific filter if desired.
params:
filter: str
The filter to apply to the query. Default is an empty string.
max_page_size: int
The maximum number of items to return per page. Default is 100.
fields: str
The fields to return. Default is all fields.
"""
self.collectioninstance.get_records(filter, max_page_size, fields, all_pages)
4 changes: 4 additions & 0 deletions nmdc_notebook_tools/lat_long_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@


class LatLongFilters(CollectionSearch):
"""
Class to interact with the NMDC API to filter sets by latitude and longitude.
"""

def __init__(self, collection_name):
self.collection_name = collection_name
super().__init__(self.collection_name)
Expand Down
20 changes: 20 additions & 0 deletions nmdc_notebook_tools/test/test_func_ann_agg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from nmdc_notebook_tools.functional_annotation_agg_search import (
FunctionalAnnotationAggSearch,
)
import logging
import unittest
from nmdc_notebook_tools.utils import Utils


class TestFunctionalAnnotation(unittest.TestCase):
def test_func_ann_id(self):
fannagg = FunctionalAnnotationAggSearch()
results = fannagg.get_functional_annotations("K01426", "KEGG")
self.assertGreater(len(results), 0)
self.assertEqual(results[0]["gene_function_id"], "KEGG.ORTHOLOGY:K01426")

def test_func_ann_id_fail(self):
fannagg = FunctionalAnnotationAggSearch()
with self.assertRaises(ValueError):
fannagg.get_functional_annotations("K01426", "nfjbg")

0 comments on commit 8cd9024

Please sign in to comment.