Skip to content

Commit

Permalink
functional annotation agg
Browse files Browse the repository at this point in the history
  • Loading branch information
hesspnnl committed Jan 31, 2025
1 parent 0c58aaf commit 53c91c4
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 0 deletions.
14 changes: 14 additions & 0 deletions nmdc_notebook_tools/functional_annotation_agg_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
from nmdc_notebook_tools.functional_search import FunctionalSearch
import logging

logger = logging.getLogger(__name__)


class FunctionalAnnotationAggSearch(FunctionalSearch):
"""
Class to interact with the NMDC API to get functional annotation agg sets. These are most helpful when trying identify workflows associted with a KEGG, COG, or PFAM ids.
"""

def __init__(self):
super().__init__()
49 changes: 49 additions & 0 deletions nmdc_notebook_tools/functional_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-

from nmdc_notebook_tools.collection_search import CollectionSearch


class FunctionalSearch:
"""
Class to interact with the NMDC API to filter functional annotations by KEGG, COG, or PFAM ids.
"""

def __init__(self):
self.collectioninstance = CollectionSearch("functional_annotation_agg")

def get_functional_annotation_id(
self, id: str, id_type: str, page_size=25, fields="", all_pages=False
):
"""
Get a record from the NMDC API by id. ID types can be KEGG, COG, or PFAM.
params:
id: str
The data base id to query the function annotations.
id_type:
The type of id to query. MUST be one of the following:
KEGG
COG
PFAM
page_size: int
The number of results to return per page. Default is 25.
fields: str
The fields to return. Default is all fields.
Example: "id,name"
all_pages: bool
True to return all pages. False to return the first page. Default is False.
"""
if id_type not in ["KEGG", "COG", "PFAM"]:
raise ValueError("id_type must be one of the following: KEGG, COG, PFAM")
if id_type == "KEGG":
formatted_id_type = f"KEGG.ORTHOLOGY:{id}"
elif id_type == "COG":
formatted_id_type = f"COG:{id}"
elif id_type == "PFAM":
formatted_id_type = f"PFAM:{id}"

filter = f'{{"gene_function_id": "{formatted_id_type}"}}'

result = self.collectioninstance.get_record_by_filter(
filter, page_size, fields, all_pages
)
return result
4 changes: 4 additions & 0 deletions nmdc_notebook_tools/lat_long_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@


class LatLongFilters(CollectionSearch):
"""
Class to interact with the NMDC API to filter sets by latitude and longitude.
"""

def __init__(self, collection_name):
self.collection_name = collection_name
super().__init__(self.collection_name)
Expand Down
20 changes: 20 additions & 0 deletions nmdc_notebook_tools/test/test_func_ann_agg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from nmdc_notebook_tools.functional_annotation_agg_search import (
FunctionalAnnotationAggSearch,
)
import logging
import unittest
from nmdc_notebook_tools.utils import Utils


class TestFunctionalAnnotation(unittest.TestCase):
def test_func_ann_id(self):
fannagg = FunctionalAnnotationAggSearch()
results = fannagg.get_functional_annotation_id("K01426", "KEGG")
self.assertGreater(len(results), 0)
self.assertEqual(results[0]["gene_function_id"], "KEGG.ORTHOLOGY:K01426")

def test_func_ann_id_fail(self):
fannagg = FunctionalAnnotationAggSearch()
with self.assertRaises(ValueError):
fannagg.get_functional_annotation_id("K01426", "nfjbg")

0 comments on commit 53c91c4

Please sign in to comment.