Updated with partial match and abbreviation match

neurobagel · Jul 22, 2024 · de45965 · de45965
1 parent fc882cf
commit de45965
Show file tree

Hide file tree

Showing 8 changed files with 475 additions and 26 deletions.
diff --git a/app/fetch_termURLs/abbreviations/__init__.py b/app/fetch_termURLs/abbreviations/__init__.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+sys.path.append(
+    os.path.dirname(os.path.realpath(__file__))
+)  # Fixes relative import error.
diff --git a/app/fetch_termURLs/abbreviations/abbreviations_assessment.py b/app/fetch_termURLs/abbreviations/abbreviations_assessment.py
@@ -0,0 +1,91 @@
+abbreviations_assessment = [
+    {
+        "label": "behavioral approach/inhibition systems",
+        "abbreviations": ["BIS"],
+    },
+    {"label": "Barratt Impulsiveness Scale", "abbreviations": ["BIS"]},
+    {"label": "DOSPERT", "abbreviations": ["DO", "DOTS", "DOS"]},
+    {
+        "label": "Positive and Negative Affect Scale",
+        "abbreviations": ["PANAS"],
+    },
+    {"label": "balloon analogue risk task", "abbreviations": ["BART"]},
+    {"label": "stop signal task", "abbreviations": ["SST", "SSTT"]},
+    {"label": "breath-holding", "abbreviations": ["BH"]},
+    {"label": "paired associate learning", "abbreviations": ["PAL", "PALA"]},
+    {"label": "paired associate recall", "abbreviations": ["PAIR"]},
+    {
+        "label": "spatial working memory task",
+        "abbreviations": ["SWM", "SWMT", "N-back"],
+    },
+    {
+        "label": "Wechsler Memory Scale Fourth Edition",
+        "abbreviations": ["WMS-IV"],
+    },
+    {"label": "letter number sequencing", "abbreviations": ["LNS", "LNSS"]},
+    {
+        "label": "Wechsler Adult Intelligence Scale - Revised",
+        "abbreviations": ["WAIS-R"],
+    },
+    {
+        "label": "n-back task",
+        "abbreviations": ["NVT", "N-Task", "NTB", "n-task"],
+    },
+    {
+        "label": "Scale for the Assessment of Negative Symptoms",
+        "abbreviations": ["SANS"],
+    },
+    {
+        "label": "Scale for the Assessment of Positive Symptoms",
+        "abbreviations": ["SAPS"],
+    },
+    {"label": "big five questionnaire", "abbreviations": ["NEO-FFI", "PAQ"]},
+    {"label": "Mini Mental State Examination", "abbreviations": ["MMSE"]},
+    {"label": "PEBL Perceptual Vigilance Task", "abbreviations": ["PVT"]},
+    {
+        "label": "Raven's Advanced Progressive Matrices",
+        "abbreviations": ["RAPM"],
+    },
+    {"label": "Kaufman Brief Intelligence Test", "abbreviations": ["KBIT"]},
+    {"label": "theory of mind task", "abbreviations": ["ToM", "TOMS"]},
+    {"label": "Stroop task", "abbreviations": ["ST"]},
+    {
+        "label": "Comprehensive Test of Phonological Processing",
+        "abbreviations": ["CTOPP"],
+    },
+    {"label": "Peabody Picture Vocabulary Test", "abbreviations": ["PPVT"]},
+    {
+        "label": "Wechsler Abbreviated Scale of Intelligence",
+        "abbreviations": ["WASI"],
+    },
+    {"label": "multisource interference task", "abbreviations": ["MIST"]},
+    {
+        "label": "Center for Epidemiologic Studies Depression Scale",
+        "abbreviations": ["CES-D"],
+    },
+    {"label": "Kirby Delay Discounting Task", "abbreviations": ["KD", "KDDT"]},
+    {
+        "label": "UPPS-P Impulsivity Scale",
+        "abbreviations": ["UPISS", "UPPS-PIS"],
+    },
+    {"label": "Zuckerman Sensation Seeking Scale", "abbreviations": ["SSS"]},
+    {"label": "Eriksen flanker task", "abbreviations": ["EF"]},
+    {"label": "Continuous Performance Task", "abbreviations": ["CPT", "CPCT"]},
+    {"label": "Emotion Regulation Questionnaire", "abbreviations": ["ERQ"]},
+    {"label": "Motor Screening Task", "abbreviations": ["MST"]},
+    {"label": "Reaction Time", "abbreviations": ["RT", "RTT"]},
+    {
+        "label": "Rapid Visual Information Processing",
+        "abbreviations": ["RVIP", "RVP"],
+    },
+    {"label": "Stockings of Cambridge Task", "abbreviations": ["SCAT"]},
+    {
+        "label": "Edinburgh Handedness Inventory",
+        "abbreviations": ["EHI", "EHII"],
+    },
+    {"label": "National Adult Reading Test", "abbreviations": ["NART", "NAT"]},
+    {
+        "label": "Structured Clinical Interview for Diagnostic and Statistical Manual of Mental Disorders (DSM-IV)",
+        "abbreviations": ["SCID"],
+    },
+]
diff --git a/app/fetch_termURLs/abbreviations/abbreviations_diagnosis.py b/app/fetch_termURLs/abbreviations/abbreviations_diagnosis.py
@@ -0,0 +1,36 @@
+abbreviations_diagnosis = [
+    {"label": "Schizophrenia", "abbreviations": ["SZ", "SP", "SZD"]},
+    {"label": "Bipolar disorder", "abbreviations": ["BD", "BPI"]},
+    {
+        "label": "Attention deficit hyperactivity disorder",
+        "abbreviations": ["ADHD", "ADHD-PI", "ADHD-C"],
+    },
+    {"label": "Separation anxiety", "abbreviations": ["SAD", "SAS"]},
+    {"label": "Generalized anxiety disorder", "abbreviations": ["GAD", "GAN"]},
+    {"label": "Social phobia", "abbreviations": ["SAD", "SPS", "SPH"]},
+    {"label": "Major depressive disorder", "abbreviations": ["MDD"]},
+    {
+        "label": "Autism spectrum disorder",
+        "abbreviations": ["ASD", "AS", "ASDs"],
+    },
+    {"label": "Overweight", "abbreviations": ["OW", "BW"]},
+    {"label": "Parkinson's disease", "abbreviations": ["PD", "PDx", "PDs"]},
+    {"label": "Traumatic brain injury", "abbreviations": ["TBI", "TBI-SCI"]},
+    {"label": "Obsessive-compulsive disorder", "abbreviations": ["OCD"]},
+    {"label": "Fibromyalgia", "abbreviations": ["FM"]},
+    {"label": "Visual impairment", "abbreviations": ["VI", "VIa", "VA"]},
+    {"label": "Depressive disorder", "abbreviations": ["MDD", "DD"]},
+    {"label": "Dysthymia", "abbreviations": ["DYSM", "DD"]},
+    {"label": "Mood disorder", "abbreviations": ["MDD", "MD"]},
+    {"label": "Dyslexia", "abbreviations": ["ID", "IDR", "DLX", "dys."]},
+    {"label": "Specific spelling disorder", "abbreviations": ["SSD"]},
+    {
+        "label": "Mental disorder",
+        "abbreviations": ["MD", "MDD", "BD", "PD", "MPD"],
+    },
+    {"label": "Anxiety disorder", "abbreviations": ["AD"]},
+    {"label": "Concussion injury of brain", "abbreviations": ["CI"]},
+    {"label": "Moderate depression", "abbreviations": ["MDD", "MODD"]},
+    {"label": "Mild depression", "abbreviations": ["MD"]},
+    {"label": "Aphasia", "abbreviations": ["AS", "AP", "Aphas."]},
+]
diff --git a/app/fetch_termURLs/abbreviations/abbreviations_image_modality.py b/app/fetch_termURLs/abbreviations/abbreviations_image_modality.py
@@ -0,0 +1,8 @@
+abbreviations_image_modality = [
+    {"label": "Arterial Spin Labeling", "abbreviations": ["ASL"]},
+    {"label": "Diffusion Weighted", "abbreviations": ["DWI", "DW"]},
+    {"label": "Electroencephalogram", "abbreviations": ["EEG", "ECG"]},
+    {"label": "Flow Weighted", "abbreviations": ["FW"]},
+    {"label": "T1 Weighted", "abbreviations": ["T1W", "T1WI"]},
+    {"label": "T2 Weighted", "abbreviations": ["TW", "T2W"]},
+]
diff --git a/app/fetch_termURLs/abbreviations/abbreviations_sex.py b/app/fetch_termURLs/abbreviations/abbreviations_sex.py
@@ -0,0 +1,5 @@
+abbreviations_sex = [
+    {"label": "male", "abbreviations": ["MALE", "M", "m"]},
+    {"label": "female", "abbreviations": ["WF", "F", "Fem.", "Female", "F."]},
+    {"label": "other", "abbreviations": ["OTH", "Oth", "OT"]},
+]
diff --git a/app/fetch_termURLs/abbreviations/get_abbreviations.py b/app/fetch_termURLs/abbreviations/get_abbreviations.py
@@ -0,0 +1,185 @@
+import argparse
+import json
+import re
+import os
+import requests
+from typing import List, Dict
+from langchain_community.chat_models import ChatOllama
+from langchain_core.prompts import PromptTemplate
+from app.fetch_termURLs.termURL_mappings import (
+    sex_mapping,
+    diagnosis_url,
+    assessment_url,
+    image_modality_mapping,
+)
+
+
+def fetch_diagnosis_labels(url: str) -> List[str]:
+    """
+    Fetches diagnosis terms from the given API URL.
+
+    Args:
+        url (str): The API URL to fetch terms from.
+
+    Returns:
+        List[str]: A list of diagnosis terms.
+    """
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception(
+            f"Failed to fetch data from {url} with status code {response.status_code}"
+        )
+
+    data = response.json()
+    return [
+        term.get("Label", "").strip()
+        for term in data.get("nb:Diagnosis", [])
+        if term.get("Label")
+    ]
+
+
+def fetch_assessment_labels(url: str) -> List[str]:
+    """
+    Fetches assessment terms from the given API URL.
+
+    Args:
+        url (str): The API URL to fetch terms from.
+
+    Returns:
+        List[str]: A list of assessment terms.
+    """
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception(
+            f"Failed to fetch data from {url} with status code {response.status_code}"
+        )
+
+    data = response.json()
+    return [
+        term.get("Label", "").strip()
+        for term in data.get("nb:Assessment", [])
+        if term.get("Label")
+    ]
+
+
+def fetch_sex_labels(mapping: Dict[str, str]) -> List[str]:
+    """
+    Fetches sex labels from the hardcoded mapping.
+
+    Args:
+        mapping (Dict[str, str]): The hardcoded mapping for sex terms.
+
+    Returns:
+        List[str]: A list of sex labels.
+    """
+    return list(mapping.keys())
+
+
+def fetch_image_modality_labels(mapping: List[Dict[str, str]]) -> List[str]:
+    """
+    Fetches image modality labels from the hardcoded mapping.
+
+    Args:
+        mapping (List[Dict[str, str]]): The hardcoded mapping for image modality terms.
+
+    Returns:
+        List[str]: A list of image modality labels.
+    """
+    return [item["label"] for item in mapping]
+
+
+def generate_abbreviations(input_terms: List[str], output_file: str) -> None:
+    """
+    Generates abbreviations for a list of terms using ChatOllama model and saves them in a Python file.
+
+    Args:
+        input_terms (List[str]): List of terms for which abbreviations are generated.
+        output_file (str): Output Python file to save abbreviations.
+    """
+    llm = ChatOllama(model="llama3")
+    prompt = PromptTemplate(
+        template="""Please respond with abbreviations most commonly used for the
+following term: {term}.
+Give only the abbreviations as output and prefer the ones used in research
+data and papers.
+For example:
+Input:'diabetes mellitus'
+Output:'DM', 'DM2', 'DM1'.
+Do Not Give any explanation in the output.
+Input: "{term}"
+Output= <abbreviations>
+    """,
+        input_variables=["term"],
+    )
+    chain = prompt | llm
+
+    abbreviations_list = []
+    for term in input_terms:
+        if not term:
+            continue
+
+        response = chain.invoke({"term": term})
+        response = str(response)
+        match = re.search(r"content='(.*?)'", response)
+
+        if match:
+            content_part = match.group(1)
+        else:
+            content_part = ""
+        abbreviations = (
+            [abbr.strip() for abbr in content_part.split(",")]
+            if content_part
+            else []
+        )
+        abbreviations_dict = {"label": term, "abbreviations": abbreviations}
+        abbreviations_list.append(abbreviations_dict)
+
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    output_path = os.path.join(script_dir, output_file)
+
+    with open(output_path, "w") as py_file:
+        py_file.write(
+            f"{os.path.splitext(os.path.basename(output_file))[0]} = {json.dumps(abbreviations_list, indent=4)}\n"
+        )
+
+    print(f"Abbreviations saved to {output_path}")
+
+
+def main(output_file_prefix: str) -> None:
+    """
+    Main function to fetch labels and generate abbreviations for diagnosis, assessment, sex, and image modality terms.
+    Saves each type of labels in a separate Python file.
+
+    Args:
+        output_file_prefix (str): Prefix for output Python files.
+    """
+
+    diagnosis_terms = fetch_diagnosis_labels(diagnosis_url)
+    generate_abbreviations(
+        diagnosis_terms, f"{output_file_prefix}_diagnosis.py"
+    )
+
+    assessment_terms = fetch_assessment_labels(assessment_url)
+    generate_abbreviations(
+        assessment_terms, f"{output_file_prefix}_assessment.py"
+    )
+
+    sex_terms = fetch_sex_labels(sex_mapping)
+    generate_abbreviations(sex_terms, f"{output_file_prefix}_sex.py")
+
+    image_modality_terms = fetch_image_modality_labels(image_modality_mapping)
+    generate_abbreviations(
+        image_modality_terms, f"{output_file_prefix}_image_modality.py"
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process terms.")
+    parser.add_argument(
+        "--output-prefix",
+        type=str,
+        default="abbreviations",
+        help="The prefix for output Python files to save abbreviations.",
+    )
+    args = parser.parse_args()
+    main(args.output_prefix)