Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly ingest ko terms and COG pathways #1459

Merged
merged 2 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions nmdc_server/ingest/kegg.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def ingest_ko_search(db: Session) -> None:
def get_search_records_from_delimeted_file(
file,
term_key,
text_key,
records,
text_key=None,
delimeter="\t",
fallback_text_key=None,
fieldnames=None,
Expand All @@ -75,8 +75,10 @@ def get_search_records_from_delimeted_file(
continue
if fallback_text_key:
records[row[term_key]] = row[text_key] or row[fallback_text_key]
else:
elif text_key:
records[row[term_key]] = row[text_key]
else:
records[row[term_key]] = ""
except FileNotFoundError:
errors["kegg_search"].add(f"Missing {file}")

Expand All @@ -100,7 +102,7 @@ def get_search_records_from_delimeted_file(

cog_function_headers = ["function_code", "sequence", "definition"]

delimeted_files: Dict[str, List[Dict[str, Union[str, List[str]]]]] = {
delimeted_files: Dict[str, List[Dict[str, Union[None, str, List[str]]]]] = {
PATHWAY_FILE: [
{
"term_key": "image_id",
Expand All @@ -117,22 +119,24 @@ def get_search_records_from_delimeted_file(
"hierarchy": "cog",
}
],
# Cog pathways and terms come out of the same file
COG_PATHWAY_DEFS: [
# Pathways
{
"fieldnames": cog_def_headers,
"term_key": cog_def_headers[4],
"text_key": cog_def_headers[4],
"text_key": None, # COG pathways just have a name
"hierarchy": "cog",
}
],
COG_TERM_DEFS: [
},
# Terms
{
"fieldnames": cog_def_headers,
"term_key": cog_def_headers[0],
"text_key": cog_def_headers[2],
"hierarchy": "cog",
}
},
],
# PFAM terms and clans come out of the same file
PFAM_TERM_DEFS: [
{
"fieldnames": pfam_headers,
Expand All @@ -157,28 +161,28 @@ def get_search_records():
"cog": {},
}

def ingest_tree(node: dict) -> None:
def ingest_tree(node: dict, hierarchy: str) -> None:
if not node.get("children", False):
term, *text = node["name"].split(" ", maxsplit=1)
if "BR:" not in term:
# Skip over BRITE term hierarchies that have no children
records[term] = text[0] if text else ""
records[hierarchy][term] = text[0] if text else ""

for child in node.get("children", ()):
ingest_tree(child)
ingest_tree(child, hierarchy)

for url in [MODULE_URL, ORTHOLOGY_URL]:
req = requests.get(url)
req.raise_for_status()
ingest_tree(req.json())
ingest_tree(req.json(), "ko")

for file, keys in delimeted_files.items():
for key_set in keys:
get_search_records_from_delimeted_file(
file,
key_set["term_key"],
key_set["text_key"],
records[str(key_set["hierarchy"])],
text_key=key_set["text_key"],
fallback_text_key=key_set.get("fallback_text_key", None),
fieldnames=key_set.get("fieldnames", None),
)
Expand Down
9 changes: 8 additions & 1 deletion web/src/components/FilterGene.vue
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,16 @@ export default defineComponent({
return request(() => props.geneTypeParams.searchFunction(search.value || ''));
}

function getTermDisplayText(term: string, text: string) {
if (text) {
return `${term}: ${text}`;
}
return term;
}

watch(search, async () => {
const resp = (await geneSearch())
.map((v: KeggTermSearchResponse) => ({ text: `${v.term}: ${v.text}`, value: v.term }));
.map((v: KeggTermSearchResponse) => ({ text: getTermDisplayText(v.term, v.text), value: v.term }));
if (resp.length === 0 && search.value && props.geneTypeParams.searchWithInputText(search.value)) {
resp.push({ value: search.value, text: search.value });
}
Expand Down