Skip to content

Commit

Permalink
Updated with partial match and abbreviation match
Browse files Browse the repository at this point in the history
  • Loading branch information
Raya679 committed Jul 22, 2024
1 parent fc882cf commit de45965
Show file tree
Hide file tree
Showing 8 changed files with 475 additions and 26 deletions.
6 changes: 6 additions & 0 deletions app/fetch_termURLs/abbreviations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os
import sys

sys.path.append(
os.path.dirname(os.path.realpath(__file__))
) # Fixes relative import error.
91 changes: 91 additions & 0 deletions app/fetch_termURLs/abbreviations/abbreviations_assessment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
abbreviations_assessment = [
{
"label": "behavioral approach/inhibition systems",
"abbreviations": ["BIS"],
},
{"label": "Barratt Impulsiveness Scale", "abbreviations": ["BIS"]},
{"label": "DOSPERT", "abbreviations": ["DO", "DOTS", "DOS"]},
{
"label": "Positive and Negative Affect Scale",
"abbreviations": ["PANAS"],
},
{"label": "balloon analogue risk task", "abbreviations": ["BART"]},
{"label": "stop signal task", "abbreviations": ["SST", "SSTT"]},
{"label": "breath-holding", "abbreviations": ["BH"]},
{"label": "paired associate learning", "abbreviations": ["PAL", "PALA"]},
{"label": "paired associate recall", "abbreviations": ["PAIR"]},
{
"label": "spatial working memory task",
"abbreviations": ["SWM", "SWMT", "N-back"],
},
{
"label": "Wechsler Memory Scale Fourth Edition",
"abbreviations": ["WMS-IV"],
},
{"label": "letter number sequencing", "abbreviations": ["LNS", "LNSS"]},
{
"label": "Wechsler Adult Intelligence Scale - Revised",
"abbreviations": ["WAIS-R"],
},
{
"label": "n-back task",
"abbreviations": ["NVT", "N-Task", "NTB", "n-task"],
},
{
"label": "Scale for the Assessment of Negative Symptoms",
"abbreviations": ["SANS"],
},
{
"label": "Scale for the Assessment of Positive Symptoms",
"abbreviations": ["SAPS"],
},
{"label": "big five questionnaire", "abbreviations": ["NEO-FFI", "PAQ"]},
{"label": "Mini Mental State Examination", "abbreviations": ["MMSE"]},
{"label": "PEBL Perceptual Vigilance Task", "abbreviations": ["PVT"]},
{
"label": "Raven's Advanced Progressive Matrices",
"abbreviations": ["RAPM"],
},
{"label": "Kaufman Brief Intelligence Test", "abbreviations": ["KBIT"]},
{"label": "theory of mind task", "abbreviations": ["ToM", "TOMS"]},
{"label": "Stroop task", "abbreviations": ["ST"]},
{
"label": "Comprehensive Test of Phonological Processing",
"abbreviations": ["CTOPP"],
},
{"label": "Peabody Picture Vocabulary Test", "abbreviations": ["PPVT"]},
{
"label": "Wechsler Abbreviated Scale of Intelligence",
"abbreviations": ["WASI"],
},
{"label": "multisource interference task", "abbreviations": ["MIST"]},
{
"label": "Center for Epidemiologic Studies Depression Scale",
"abbreviations": ["CES-D"],
},
{"label": "Kirby Delay Discounting Task", "abbreviations": ["KD", "KDDT"]},
{
"label": "UPPS-P Impulsivity Scale",
"abbreviations": ["UPISS", "UPPS-PIS"],
},
{"label": "Zuckerman Sensation Seeking Scale", "abbreviations": ["SSS"]},
{"label": "Eriksen flanker task", "abbreviations": ["EF"]},
{"label": "Continuous Performance Task", "abbreviations": ["CPT", "CPCT"]},
{"label": "Emotion Regulation Questionnaire", "abbreviations": ["ERQ"]},
{"label": "Motor Screening Task", "abbreviations": ["MST"]},
{"label": "Reaction Time", "abbreviations": ["RT", "RTT"]},
{
"label": "Rapid Visual Information Processing",
"abbreviations": ["RVIP", "RVP"],
},
{"label": "Stockings of Cambridge Task", "abbreviations": ["SCAT"]},
{
"label": "Edinburgh Handedness Inventory",
"abbreviations": ["EHI", "EHII"],
},
{"label": "National Adult Reading Test", "abbreviations": ["NART", "NAT"]},
{
"label": "Structured Clinical Interview for Diagnostic and Statistical Manual of Mental Disorders (DSM-IV)",
"abbreviations": ["SCID"],
},
]
36 changes: 36 additions & 0 deletions app/fetch_termURLs/abbreviations/abbreviations_diagnosis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
abbreviations_diagnosis = [
{"label": "Schizophrenia", "abbreviations": ["SZ", "SP", "SZD"]},
{"label": "Bipolar disorder", "abbreviations": ["BD", "BPI"]},
{
"label": "Attention deficit hyperactivity disorder",
"abbreviations": ["ADHD", "ADHD-PI", "ADHD-C"],
},
{"label": "Separation anxiety", "abbreviations": ["SAD", "SAS"]},
{"label": "Generalized anxiety disorder", "abbreviations": ["GAD", "GAN"]},
{"label": "Social phobia", "abbreviations": ["SAD", "SPS", "SPH"]},
{"label": "Major depressive disorder", "abbreviations": ["MDD"]},
{
"label": "Autism spectrum disorder",
"abbreviations": ["ASD", "AS", "ASDs"],
},
{"label": "Overweight", "abbreviations": ["OW", "BW"]},
{"label": "Parkinson's disease", "abbreviations": ["PD", "PDx", "PDs"]},
{"label": "Traumatic brain injury", "abbreviations": ["TBI", "TBI-SCI"]},
{"label": "Obsessive-compulsive disorder", "abbreviations": ["OCD"]},
{"label": "Fibromyalgia", "abbreviations": ["FM"]},
{"label": "Visual impairment", "abbreviations": ["VI", "VIa", "VA"]},
{"label": "Depressive disorder", "abbreviations": ["MDD", "DD"]},
{"label": "Dysthymia", "abbreviations": ["DYSM", "DD"]},
{"label": "Mood disorder", "abbreviations": ["MDD", "MD"]},
{"label": "Dyslexia", "abbreviations": ["ID", "IDR", "DLX", "dys."]},
{"label": "Specific spelling disorder", "abbreviations": ["SSD"]},
{
"label": "Mental disorder",
"abbreviations": ["MD", "MDD", "BD", "PD", "MPD"],
},
{"label": "Anxiety disorder", "abbreviations": ["AD"]},
{"label": "Concussion injury of brain", "abbreviations": ["CI"]},
{"label": "Moderate depression", "abbreviations": ["MDD", "MODD"]},
{"label": "Mild depression", "abbreviations": ["MD"]},
{"label": "Aphasia", "abbreviations": ["AS", "AP", "Aphas."]},
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
abbreviations_image_modality = [
{"label": "Arterial Spin Labeling", "abbreviations": ["ASL"]},
{"label": "Diffusion Weighted", "abbreviations": ["DWI", "DW"]},
{"label": "Electroencephalogram", "abbreviations": ["EEG", "ECG"]},
{"label": "Flow Weighted", "abbreviations": ["FW"]},
{"label": "T1 Weighted", "abbreviations": ["T1W", "T1WI"]},
{"label": "T2 Weighted", "abbreviations": ["TW", "T2W"]},
]
5 changes: 5 additions & 0 deletions app/fetch_termURLs/abbreviations/abbreviations_sex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
abbreviations_sex = [
{"label": "male", "abbreviations": ["MALE", "M", "m"]},
{"label": "female", "abbreviations": ["WF", "F", "Fem.", "Female", "F."]},
{"label": "other", "abbreviations": ["OTH", "Oth", "OT"]},
]
185 changes: 185 additions & 0 deletions app/fetch_termURLs/abbreviations/get_abbreviations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import argparse
import json
import re
import os
import requests
from typing import List, Dict
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import PromptTemplate
from app.fetch_termURLs.termURL_mappings import (
sex_mapping,
diagnosis_url,
assessment_url,
image_modality_mapping,
)


def fetch_diagnosis_labels(url: str) -> List[str]:
"""
Fetches diagnosis terms from the given API URL.
Args:
url (str): The API URL to fetch terms from.
Returns:
List[str]: A list of diagnosis terms.
"""
response = requests.get(url)
if response.status_code != 200:
raise Exception(
f"Failed to fetch data from {url} with status code {response.status_code}"
)

data = response.json()
return [
term.get("Label", "").strip()
for term in data.get("nb:Diagnosis", [])
if term.get("Label")
]


def fetch_assessment_labels(url: str) -> List[str]:
"""
Fetches assessment terms from the given API URL.
Args:
url (str): The API URL to fetch terms from.
Returns:
List[str]: A list of assessment terms.
"""
response = requests.get(url)
if response.status_code != 200:
raise Exception(
f"Failed to fetch data from {url} with status code {response.status_code}"
)

data = response.json()
return [
term.get("Label", "").strip()
for term in data.get("nb:Assessment", [])
if term.get("Label")
]


def fetch_sex_labels(mapping: Dict[str, str]) -> List[str]:
"""
Fetches sex labels from the hardcoded mapping.
Args:
mapping (Dict[str, str]): The hardcoded mapping for sex terms.
Returns:
List[str]: A list of sex labels.
"""
return list(mapping.keys())


def fetch_image_modality_labels(mapping: List[Dict[str, str]]) -> List[str]:
"""
Fetches image modality labels from the hardcoded mapping.
Args:
mapping (List[Dict[str, str]]): The hardcoded mapping for image modality terms.
Returns:
List[str]: A list of image modality labels.
"""
return [item["label"] for item in mapping]


def generate_abbreviations(input_terms: List[str], output_file: str) -> None:
"""
Generates abbreviations for a list of terms using ChatOllama model and saves them in a Python file.
Args:
input_terms (List[str]): List of terms for which abbreviations are generated.
output_file (str): Output Python file to save abbreviations.
"""
llm = ChatOllama(model="llama3")
prompt = PromptTemplate(
template="""Please respond with abbreviations most commonly used for the
following term: {term}.
Give only the abbreviations as output and prefer the ones used in research
data and papers.
For example:
Input:'diabetes mellitus'
Output:'DM', 'DM2', 'DM1'.
Do Not Give any explanation in the output.
Input: "{term}"
Output= <abbreviations>
""",
input_variables=["term"],
)
chain = prompt | llm

abbreviations_list = []
for term in input_terms:
if not term:
continue

response = chain.invoke({"term": term})
response = str(response)
match = re.search(r"content='(.*?)'", response)

if match:
content_part = match.group(1)
else:
content_part = ""
abbreviations = (
[abbr.strip() for abbr in content_part.split(",")]
if content_part
else []
)
abbreviations_dict = {"label": term, "abbreviations": abbreviations}
abbreviations_list.append(abbreviations_dict)

script_dir = os.path.dirname(os.path.abspath(__file__))
output_path = os.path.join(script_dir, output_file)

with open(output_path, "w") as py_file:
py_file.write(
f"{os.path.splitext(os.path.basename(output_file))[0]} = {json.dumps(abbreviations_list, indent=4)}\n"
)

print(f"Abbreviations saved to {output_path}")


def main(output_file_prefix: str) -> None:
"""
Main function to fetch labels and generate abbreviations for diagnosis, assessment, sex, and image modality terms.
Saves each type of labels in a separate Python file.
Args:
output_file_prefix (str): Prefix for output Python files.
"""

diagnosis_terms = fetch_diagnosis_labels(diagnosis_url)
generate_abbreviations(
diagnosis_terms, f"{output_file_prefix}_diagnosis.py"
)

assessment_terms = fetch_assessment_labels(assessment_url)
generate_abbreviations(
assessment_terms, f"{output_file_prefix}_assessment.py"
)

sex_terms = fetch_sex_labels(sex_mapping)
generate_abbreviations(sex_terms, f"{output_file_prefix}_sex.py")

image_modality_terms = fetch_image_modality_labels(image_modality_mapping)
generate_abbreviations(
image_modality_terms, f"{output_file_prefix}_image_modality.py"
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process terms.")
parser.add_argument(
"--output-prefix",
type=str,
default="abbreviations",
help="The prefix for output Python files to save abbreviations.",
)
args = parser.parse_args()
main(args.output_prefix)
Loading

0 comments on commit de45965

Please sign in to comment.