diff --git a/nmdc_notebook_tools/functional_annotation_agg_search.py b/nmdc_notebook_tools/functional_annotation_agg_search.py new file mode 100644 index 0000000..6bc4cea --- /dev/null +++ b/nmdc_notebook_tools/functional_annotation_agg_search.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +from nmdc_notebook_tools.functional_search import FunctionalSearch +import logging + +logger = logging.getLogger(__name__) + + +class FunctionalAnnotationAggSearch(FunctionalSearch): + """ + Class to interact with the NMDC API to get functional annotation agg sets. These are most helpful when trying identify workflows associted with a KEGG, COG, or PFAM ids. + """ + + def __init__(self): + super().__init__() diff --git a/nmdc_notebook_tools/functional_search.py b/nmdc_notebook_tools/functional_search.py new file mode 100644 index 0000000..02e6938 --- /dev/null +++ b/nmdc_notebook_tools/functional_search.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +from nmdc_notebook_tools.collection_search import CollectionSearch + + +class FunctionalSearch: + """ + Class to interact with the NMDC API to filter functional annotations by KEGG, COG, or PFAM ids. + """ + + def __init__(self): + self.collectioninstance = CollectionSearch("functional_annotation_agg") + + def get_functional_annotation_id( + self, id: str, id_type: str, page_size=25, fields="", all_pages=False + ): + """ + Get a record from the NMDC API by id. ID types can be KEGG, COG, or PFAM. + params: + id: str + The data base id to query the function annotations. + id_type: + The type of id to query. MUST be one of the following: + KEGG + COG + PFAM + page_size: int + The number of results to return per page. Default is 25. + fields: str + The fields to return. Default is all fields. + Example: "id,name" + all_pages: bool + True to return all pages. False to return the first page. Default is False. + """ + if id_type not in ["KEGG", "COG", "PFAM"]: + raise ValueError("id_type must be one of the following: KEGG, COG, PFAM") + if id_type == "KEGG": + formatted_id_type = f"KEGG.ORTHOLOGY:{id}" + elif id_type == "COG": + formatted_id_type = f"COG:{id}" + elif id_type == "PFAM": + formatted_id_type = f"PFAM:{id}" + + filter = f'{{"gene_function_id": "{formatted_id_type}"}}' + + result = self.collectioninstance.get_record_by_filter( + filter, page_size, fields, all_pages + ) + return result diff --git a/nmdc_notebook_tools/lat_long_filters.py b/nmdc_notebook_tools/lat_long_filters.py index 8fdf4b5..d34bd50 100644 --- a/nmdc_notebook_tools/lat_long_filters.py +++ b/nmdc_notebook_tools/lat_long_filters.py @@ -6,6 +6,10 @@ class LatLongFilters(CollectionSearch): + """ + Class to interact with the NMDC API to filter sets by latitude and longitude. + """ + def __init__(self, collection_name): self.collection_name = collection_name super().__init__(self.collection_name) diff --git a/nmdc_notebook_tools/test/test_func_ann_agg.py b/nmdc_notebook_tools/test/test_func_ann_agg.py new file mode 100644 index 0000000..adf7f26 --- /dev/null +++ b/nmdc_notebook_tools/test/test_func_ann_agg.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from nmdc_notebook_tools.functional_annotation_agg_search import ( + FunctionalAnnotationAggSearch, +) +import logging +import unittest +from nmdc_notebook_tools.utils import Utils + + +class TestFunctionalAnnotation(unittest.TestCase): + def test_func_ann_id(self): + fannagg = FunctionalAnnotationAggSearch() + results = fannagg.get_functional_annotation_id("K01426", "KEGG") + self.assertGreater(len(results), 0) + self.assertEqual(results[0]["gene_function_id"], "KEGG.ORTHOLOGY:K01426") + + def test_func_ann_id_fail(self): + fannagg = FunctionalAnnotationAggSearch() + with self.assertRaises(ValueError): + fannagg.get_functional_annotation_id("K01426", "nfjbg")