Skip to content

Commit

Permalink
apply projectStatus filter only if sequencingCenter is JGI in GOLD tr…
Browse files Browse the repository at this point in the history
…anslator
  • Loading branch information
sujaypatil96 committed Jan 23, 2025
1 parent b4db574 commit 5633051
Showing 1 changed file with 25 additions and 7 deletions.
32 changes: 25 additions & 7 deletions nmdc_runtime/site/translation/gold_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,29 @@
SEQUENCING_STRATEGIES = {"Metagenome", "Metatranscriptome"}


def _is_valid_project(project: dict) -> bool:
"""A project is considered valid if:
1. `sequencingStrategy` is in {"Metagenome", "Metatranscriptome"}
2. if `sequencingCenters` == 'DOE Joint Genome Institute (JGI)' then
`projectStatus` must be in ("Permanent Draft", "Complete and Published")
3. otherwise, no `projectStatus` filter is applied
:param project: GOLD project object (structurally similar to response
from `/projects` endpoint)
:return: True if the project is valid, False otherwise
"""
if project.get("sequencingStrategy") not in SEQUENCING_STRATEGIES:
return False

if project.get("sequencingCenters") == "DOE Joint Genome Institute (JGI)":
return project.get("projectStatus") in (
"Permanent Draft",
"Complete and Published",
)

return True


class GoldStudyTranslator(Translator):
def __init__(
self,
Expand All @@ -36,20 +59,15 @@ def __init__(
biosample
for biosample in biosamples
if any(
project.get("sequencingStrategy") in SEQUENCING_STRATEGIES
and project.get("projectStatus")
in ("Permanent Draft", "Complete and Published")
for project in biosample.get("projects", [])
_is_valid_project(project) for project in biosample.get("projects", [])
)
]
# Fetch the valid projectGoldIds that are associated with filtered
# biosamples on their `projects` field
valid_project_ids = {
project.get("projectGoldId")
for project in projects
if project.get("sequencingStrategy") in SEQUENCING_STRATEGIES
and project.get("projectStatus")
in ("Permanent Draft", "Complete and Published")
if _is_valid_project(project)
}
# Filter projects to only those with `projectGoldId` in valid_project_ids
self.projects = [
Expand Down

0 comments on commit 5633051

Please sign in to comment.