From 46e369f42f16ec025b0752c35466a5bc592c2fff Mon Sep 17 00:00:00 2001 From: naglepuff Date: Thu, 19 Dec 2024 12:36:14 -0500 Subject: [PATCH 1/2] Fix metap ingest --- nmdc_server/data_object_filters.py | 2 +- nmdc_server/ingest/all.py | 2 +- nmdc_server/ingest/pipeline.py | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/nmdc_server/data_object_filters.py b/nmdc_server/data_object_filters.py index d6b77ed2..0cd8c6d6 100644 --- a/nmdc_server/data_object_filters.py +++ b/nmdc_server/data_object_filters.py @@ -34,7 +34,7 @@ class WorkflowActivityTypeEnum(Enum): metabolomics_analysis = "nmdc:MetabolomicsAnalysis" metagenome_assembly = "nmdc:MetagenomeAssembly" metagenome_annotation = "nmdc:MetagenomeAnnotation" - metaproteomic_analysis = "nmdc:MetaproteomicAnalysis" + metaproteomic_analysis = "nmdc:MetaproteomicsAnalysis" metatranscriptome = "nmdc:MetatranscriptomeAnalysis" metatranscriptome_assembly = "nmdc:MetatranscriptomeAssembly" metatranscriptome_annotation = "nmdc:MetatranscriptomeAnnotation" diff --git a/nmdc_server/ingest/all.py b/nmdc_server/ingest/all.py index f3c79058..fa300799 100644 --- a/nmdc_server/ingest/all.py +++ b/nmdc_server/ingest/all.py @@ -220,7 +220,7 @@ def load(db: Session, function_limit=None, skip_annotation=False): pipeline.load( db, mongodb[workflow_set].find( - {"type": "nmdc:MetaproteomicsAnalysis"}, + {"type": WorkflowActivityTypeEnum.metaproteomic_analysis.value}, no_cursor_timeout=True, ), pipeline.load_mp_analysis, diff --git a/nmdc_server/ingest/pipeline.py b/nmdc_server/ingest/pipeline.py index c7039675..56182508 100644 --- a/nmdc_server/ingest/pipeline.py +++ b/nmdc_server/ingest/pipeline.py @@ -92,7 +92,6 @@ def load_mp_analysis(db: Session, obj: Dict[str, Any], **kwargs) -> LoadObjectRe "gene_function_id": { "$regex": gene_regex, }, - "best_protein": True, }, no_cursor_timeout=True, projection={ @@ -100,7 +99,6 @@ def load_mp_analysis(db: Session, obj: Dict[str, Any], **kwargs) -> LoadObjectRe "was_generated_by": True, "count": True, "gene_function_id": True, - "best_protein": True, }, ) if kwargs.get("function_limit"): @@ -116,7 +114,7 @@ def load_mp_analysis(db: Session, obj: Dict[str, Any], **kwargs) -> LoadObjectRe metaproteomic_analysis_id=pipeline.id, gene_function_id=function_id, count=annotation["count"], - best_protein=annotation["best_protein"], + best_protein=True, ) ) if metap_gene_function_aggregations: From 204bcd649f9cf4a4e5f741302abbe37a2d7eb759 Mon Sep 17 00:00:00 2001 From: naglepuff Date: Thu, 19 Dec 2024 14:37:17 -0500 Subject: [PATCH 2/2] Use gene query logic for all gene table values --- nmdc_server/query.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nmdc_server/query.py b/nmdc_server/query.py index 4e19b946..9cdfe17d 100644 --- a/nmdc_server/query.py +++ b/nmdc_server/query.py @@ -384,7 +384,13 @@ def query(self, db) -> Query: # Gene function queries are treated differently because they join # in three different places (metaT, metaG and metaP). - if table == Table.gene_function: + if table in [ + Table.gene_function, + Table.kegg_function, + Table.go_function, + Table.pfam_function, + Table.cog_function, + ]: metag_matches = filter.matches(db, self.table) metap_conditions = [ SimpleConditionSchema(