Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retrieving module information of input and output and EDAM ontology from bio.tools #3418

Draft
wants to merge 10 commits into
base: dev
Choose a base branch
from
80 changes: 67 additions & 13 deletions nf_core/components/components_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import re
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union

import questionary
import requests
Expand Down Expand Up @@ -165,9 +165,9 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s
return modules, subworkflows


def get_biotools_id(tool_name) -> str:
def get_biotools_response(tool_name: str) -> Optional[Dict]:
"""
Try to find a bio.tools ID for 'tool'
Try to get bio.tools information for 'tool'
"""
url = f"https://bio.tools/api/t/?q={tool_name}&format=json"
try:
Expand All @@ -176,16 +176,70 @@ def get_biotools_id(tool_name) -> str:
response.raise_for_status() # Raise an error for bad status codes
# Parse the JSON response
data = response.json()
return data

# Iterate through the tools in the response to find the tool name
for tool in data["list"]:
if tool["name"].lower() == tool_name:
return tool["biotoolsCURIE"]
except requests.exceptions.RequestException as e:
log.warning(f"Could not find bio.tools information for '{tool_name}': {e}")
return None

# If the tool name was not found in the response
log.warning(f"Could not find a bio.tools ID for '{tool_name}'")
return ""

except requests.exceptions.RequestException as e:
log.warning(f"Could not find a bio.tools ID for '{tool_name}': {e}")
return ""
def get_biotools_id(data: dict, tool_name: str) -> str:
"""
Try to find a bio.tools ID for 'tool'
"""
# Iterate through the tools in the response to find the tool name
for tool in data["list"]:
if tool["name"].lower() == tool_name:
return tool["biotoolsCURIE"]

# If the tool name was not found in the response
log.warning(f"Could not find a bio.tools ID for '{tool_name}'")
return ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if it isn't better to always return None if we don't get a correct entry, like we do in the function above


type DictWithListAndStr = Dict[str, Tuple[List[str], str]]
def get_channel_info_from_biotools(
data: dict, tool_name: str
) -> Optional[Tuple[DictWithListAndStr, DictWithListAndStr]] :
"""
Try to find input and output channels and the respective EDAM ontology terms

Args:
data (dict): The bio.tools API response
tool_name (str): The name of the tool
"""
inputs = {}
outputs = {}

def _iterate_input_output(type) -> DictWithListAndStr:
type_info = {}
if type in funct:
for element in funct[type]:
if "data" in element:
element_name = "_".join(element["data"]["term"].lower().split(" "))
uris = [element["data"]["uri"]]
terms = ""
if "format" in element:
for format in element["format"]:
# Append the EDAM URI
uris.append(format["uri"])
# Append the EDAM term, getting the first word in case of complicated strings. i.e. "FASTA format"
terms = terms + format["term"].lower().split(" ")[0] + ","
type_info[element_name] = (
uris,
terms[:-1], # Remove the last comma
)
return type_info

# Iterate through the tools in the response to find the tool name
for tool in data["list"]:
if tool["name"].lower() == tool_name:
if "function" in tool:
# Parse all tool functions
for funct in tool["function"]:
inputs.update(_iterate_input_output("input"))
outputs.update(_iterate_input_output("output"))
return inputs, outputs

# If the tool name was not found in the response
log.warning(f"Could not find an EDAM ontology term for '{tool_name}'")
return None
13 changes: 11 additions & 2 deletions nf_core/components/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import nf_core
import nf_core.utils
from nf_core.components.components_command import ComponentCommand
from nf_core.components.components_utils import get_biotools_id
from nf_core.components.components_utils import get_biotools_id, get_biotools_response, get_channel_info_from_biotools
from nf_core.pipelines.lint_utils import run_prettier_on_file

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -151,8 +151,15 @@ def create(self) -> bool:
if self.component_type == "modules":
# Try to find a bioconda package for 'component'
self._get_bioconda_tool()
name = self.tool_conda_name if self.tool_conda_name else self.component
# Try to find a biotools entry for 'component'
self.tool_identifier = get_biotools_id(self.component)
biotools_data = get_biotools_response(name)
if biotools_data:
self.tool_identifier = get_biotools_id(biotools_data, name)
# Obtain EDAM ontologies for inputs and outputs
channel_info = get_channel_info_from_biotools(biotools_data, name)
if channel_info:
self.inputs, self.outputs = channel_info

# Prompt for GitHub username
self._get_username()
Expand All @@ -176,6 +183,8 @@ def create(self) -> bool:

new_files = [str(path) for path in self.file_paths.values()]

run_prettier_on_file(new_files)

log.info("Created following files:\n " + "\n ".join(new_files))
return True

Expand Down
45 changes: 44 additions & 1 deletion nf_core/module-template/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,26 @@ tools:
## TODO nf-core: Add a description of all of the variables used as input
{% endif -%}
input:
{% if inputs -%}
{% for input_name, ontologies in inputs.items() -%}
{% if has_meta %}
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{% endif %}
- {{ input_name }}:
# TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct
type: file
description: {{ input_name }} file
pattern: {{ "\"*.{" + ontologies[1] + "}\"" }}
ontologies:
{% for ontology in ontologies[0] -%}
- edam: "{{ ontology }}"
{% endfor -%}
{% endfor -%}
{% else -%}
#{% if has_meta %} Only when we have meta
- - meta:
type: map
Expand All @@ -45,14 +65,36 @@ input:
- edam: "http://edamontology.org/format_25722"
- edam: "http://edamontology.org/format_2573"
- edam: "http://edamontology.org/format_3462"
{% else %}
{% else -%}
- edam: ""
{%- endif %}
{%- endif %}

{% if not_empty_template -%}
## TODO nf-core: Add a description of all of the variables used as output
{% endif -%}
output:
{% if outputs -%}
{% for output_name, ontologies in outputs.items() -%}
- {{ output_name }}:
{% if has_meta -%}
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{%- endif %}
- {{ output_name }}:
# TODO nf-core: Update the information obtained form bio.tools and make sure that it is correct
type: file
description: {{ output_name }} file
pattern: {{ "\"*.{" + ontologies[1] + "}\"" }}
ontologies:
{% for ontology in ontologies[0] -%}
- edam: "{{ ontology }}"
{% endfor -%}
{% endfor -%}
{% else -%}
- {{ 'bam:' if not_empty_template else "output:" }}
#{% if has_meta -%} Only when we have meta
- meta:
Expand All @@ -76,6 +118,7 @@ output:
{% else -%}
- edam: ""
{%- endif %}
{%- endif %}
- versions:
- "versions.yml":
type: file
Expand Down
7 changes: 3 additions & 4 deletions nf_core/modules/lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import nf_core.components.nfcore_component
import nf_core.modules.modules_utils
import nf_core.utils
from nf_core.components.components_utils import get_biotools_id
from nf_core.components.components_utils import get_biotools_id, get_biotools_response
from nf_core.components.lint import ComponentLint, LintExceptionError, LintResult
from nf_core.components.nfcore_component import NFCoreComponent
from nf_core.pipelines.lint_utils import console, run_prettier_on_file
Expand Down Expand Up @@ -362,9 +362,8 @@ def update_meta_yml_file(self, mod):
for i, tool in enumerate(corrected_meta_yml["tools"]):
tool_name = list(tool.keys())[0]
if "identifier" not in tool[tool_name]:
corrected_meta_yml["tools"][i][tool_name]["identifier"] = get_biotools_id(
mod.component_name if "/" not in mod.component_name else mod.component_name.split("/")[0]
)
biotools_data = get_biotools_response(tool_name)
corrected_meta_yml["tools"][i][tool_name]["identifier"] = get_biotools_id(biotools_data, tool_name)

with open(mod.meta_yml, "w") as fh:
log.info(f"Updating {mod.meta_yml}")
Expand Down
Empty file added tests/components/__init__.py
Empty file.
143 changes: 0 additions & 143 deletions tests/components/generate_snapshot.py

This file was deleted.

Loading
Loading