Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply projectStatus filter only if sequencingCenter is JGI in GOLD translator #881

Merged
merged 1 commit into from
Jan 24, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions nmdc_runtime/site/translation/gold_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,29 @@
SEQUENCING_STRATEGIES = {"Metagenome", "Metatranscriptome"}


def _is_valid_project(project: dict) -> bool:
"""A project is considered valid if:
1. `sequencingStrategy` is in {"Metagenome", "Metatranscriptome"}
2. if `sequencingCenters` == 'DOE Joint Genome Institute (JGI)' then
`projectStatus` must be in ("Permanent Draft", "Complete and Published")
3. otherwise, no `projectStatus` filter is applied

:param project: GOLD project object (structurally similar to response
from `/projects` endpoint)
:return: True if the project is valid, False otherwise
"""
if project.get("sequencingStrategy") not in SEQUENCING_STRATEGIES:
return False

if project.get("sequencingCenters") == "DOE Joint Genome Institute (JGI)":
return project.get("projectStatus") in (
"Permanent Draft",
"Complete and Published",
)

return True


class GoldStudyTranslator(Translator):
def __init__(
self,
Expand All @@ -36,20 +59,15 @@ def __init__(
biosample
for biosample in biosamples
if any(
project.get("sequencingStrategy") in SEQUENCING_STRATEGIES
and project.get("projectStatus")
in ("Permanent Draft", "Complete and Published")
for project in biosample.get("projects", [])
_is_valid_project(project) for project in biosample.get("projects", [])
)
]
# Fetch the valid projectGoldIds that are associated with filtered
# biosamples on their `projects` field
valid_project_ids = {
project.get("projectGoldId")
for project in projects
if project.get("sequencingStrategy") in SEQUENCING_STRATEGIES
and project.get("projectStatus")
in ("Permanent Draft", "Complete and Published")
if _is_valid_project(project)
}
# Filter projects to only those with `projectGoldId` in valid_project_ids
self.projects = [
Expand Down
Loading