-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated with partial match and abbreviation match
- Loading branch information
Showing
8 changed files
with
475 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import os | ||
import sys | ||
|
||
sys.path.append( | ||
os.path.dirname(os.path.realpath(__file__)) | ||
) # Fixes relative import error. |
91 changes: 91 additions & 0 deletions
91
app/fetch_termURLs/abbreviations/abbreviations_assessment.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
abbreviations_assessment = [ | ||
{ | ||
"label": "behavioral approach/inhibition systems", | ||
"abbreviations": ["BIS"], | ||
}, | ||
{"label": "Barratt Impulsiveness Scale", "abbreviations": ["BIS"]}, | ||
{"label": "DOSPERT", "abbreviations": ["DO", "DOTS", "DOS"]}, | ||
{ | ||
"label": "Positive and Negative Affect Scale", | ||
"abbreviations": ["PANAS"], | ||
}, | ||
{"label": "balloon analogue risk task", "abbreviations": ["BART"]}, | ||
{"label": "stop signal task", "abbreviations": ["SST", "SSTT"]}, | ||
{"label": "breath-holding", "abbreviations": ["BH"]}, | ||
{"label": "paired associate learning", "abbreviations": ["PAL", "PALA"]}, | ||
{"label": "paired associate recall", "abbreviations": ["PAIR"]}, | ||
{ | ||
"label": "spatial working memory task", | ||
"abbreviations": ["SWM", "SWMT", "N-back"], | ||
}, | ||
{ | ||
"label": "Wechsler Memory Scale Fourth Edition", | ||
"abbreviations": ["WMS-IV"], | ||
}, | ||
{"label": "letter number sequencing", "abbreviations": ["LNS", "LNSS"]}, | ||
{ | ||
"label": "Wechsler Adult Intelligence Scale - Revised", | ||
"abbreviations": ["WAIS-R"], | ||
}, | ||
{ | ||
"label": "n-back task", | ||
"abbreviations": ["NVT", "N-Task", "NTB", "n-task"], | ||
}, | ||
{ | ||
"label": "Scale for the Assessment of Negative Symptoms", | ||
"abbreviations": ["SANS"], | ||
}, | ||
{ | ||
"label": "Scale for the Assessment of Positive Symptoms", | ||
"abbreviations": ["SAPS"], | ||
}, | ||
{"label": "big five questionnaire", "abbreviations": ["NEO-FFI", "PAQ"]}, | ||
{"label": "Mini Mental State Examination", "abbreviations": ["MMSE"]}, | ||
{"label": "PEBL Perceptual Vigilance Task", "abbreviations": ["PVT"]}, | ||
{ | ||
"label": "Raven's Advanced Progressive Matrices", | ||
"abbreviations": ["RAPM"], | ||
}, | ||
{"label": "Kaufman Brief Intelligence Test", "abbreviations": ["KBIT"]}, | ||
{"label": "theory of mind task", "abbreviations": ["ToM", "TOMS"]}, | ||
{"label": "Stroop task", "abbreviations": ["ST"]}, | ||
{ | ||
"label": "Comprehensive Test of Phonological Processing", | ||
"abbreviations": ["CTOPP"], | ||
}, | ||
{"label": "Peabody Picture Vocabulary Test", "abbreviations": ["PPVT"]}, | ||
{ | ||
"label": "Wechsler Abbreviated Scale of Intelligence", | ||
"abbreviations": ["WASI"], | ||
}, | ||
{"label": "multisource interference task", "abbreviations": ["MIST"]}, | ||
{ | ||
"label": "Center for Epidemiologic Studies Depression Scale", | ||
"abbreviations": ["CES-D"], | ||
}, | ||
{"label": "Kirby Delay Discounting Task", "abbreviations": ["KD", "KDDT"]}, | ||
{ | ||
"label": "UPPS-P Impulsivity Scale", | ||
"abbreviations": ["UPISS", "UPPS-PIS"], | ||
}, | ||
{"label": "Zuckerman Sensation Seeking Scale", "abbreviations": ["SSS"]}, | ||
{"label": "Eriksen flanker task", "abbreviations": ["EF"]}, | ||
{"label": "Continuous Performance Task", "abbreviations": ["CPT", "CPCT"]}, | ||
{"label": "Emotion Regulation Questionnaire", "abbreviations": ["ERQ"]}, | ||
{"label": "Motor Screening Task", "abbreviations": ["MST"]}, | ||
{"label": "Reaction Time", "abbreviations": ["RT", "RTT"]}, | ||
{ | ||
"label": "Rapid Visual Information Processing", | ||
"abbreviations": ["RVIP", "RVP"], | ||
}, | ||
{"label": "Stockings of Cambridge Task", "abbreviations": ["SCAT"]}, | ||
{ | ||
"label": "Edinburgh Handedness Inventory", | ||
"abbreviations": ["EHI", "EHII"], | ||
}, | ||
{"label": "National Adult Reading Test", "abbreviations": ["NART", "NAT"]}, | ||
{ | ||
"label": "Structured Clinical Interview for Diagnostic and Statistical Manual of Mental Disorders (DSM-IV)", | ||
"abbreviations": ["SCID"], | ||
}, | ||
] |
36 changes: 36 additions & 0 deletions
36
app/fetch_termURLs/abbreviations/abbreviations_diagnosis.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
abbreviations_diagnosis = [ | ||
{"label": "Schizophrenia", "abbreviations": ["SZ", "SP", "SZD"]}, | ||
{"label": "Bipolar disorder", "abbreviations": ["BD", "BPI"]}, | ||
{ | ||
"label": "Attention deficit hyperactivity disorder", | ||
"abbreviations": ["ADHD", "ADHD-PI", "ADHD-C"], | ||
}, | ||
{"label": "Separation anxiety", "abbreviations": ["SAD", "SAS"]}, | ||
{"label": "Generalized anxiety disorder", "abbreviations": ["GAD", "GAN"]}, | ||
{"label": "Social phobia", "abbreviations": ["SAD", "SPS", "SPH"]}, | ||
{"label": "Major depressive disorder", "abbreviations": ["MDD"]}, | ||
{ | ||
"label": "Autism spectrum disorder", | ||
"abbreviations": ["ASD", "AS", "ASDs"], | ||
}, | ||
{"label": "Overweight", "abbreviations": ["OW", "BW"]}, | ||
{"label": "Parkinson's disease", "abbreviations": ["PD", "PDx", "PDs"]}, | ||
{"label": "Traumatic brain injury", "abbreviations": ["TBI", "TBI-SCI"]}, | ||
{"label": "Obsessive-compulsive disorder", "abbreviations": ["OCD"]}, | ||
{"label": "Fibromyalgia", "abbreviations": ["FM"]}, | ||
{"label": "Visual impairment", "abbreviations": ["VI", "VIa", "VA"]}, | ||
{"label": "Depressive disorder", "abbreviations": ["MDD", "DD"]}, | ||
{"label": "Dysthymia", "abbreviations": ["DYSM", "DD"]}, | ||
{"label": "Mood disorder", "abbreviations": ["MDD", "MD"]}, | ||
{"label": "Dyslexia", "abbreviations": ["ID", "IDR", "DLX", "dys."]}, | ||
{"label": "Specific spelling disorder", "abbreviations": ["SSD"]}, | ||
{ | ||
"label": "Mental disorder", | ||
"abbreviations": ["MD", "MDD", "BD", "PD", "MPD"], | ||
}, | ||
{"label": "Anxiety disorder", "abbreviations": ["AD"]}, | ||
{"label": "Concussion injury of brain", "abbreviations": ["CI"]}, | ||
{"label": "Moderate depression", "abbreviations": ["MDD", "MODD"]}, | ||
{"label": "Mild depression", "abbreviations": ["MD"]}, | ||
{"label": "Aphasia", "abbreviations": ["AS", "AP", "Aphas."]}, | ||
] |
8 changes: 8 additions & 0 deletions
8
app/fetch_termURLs/abbreviations/abbreviations_image_modality.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
abbreviations_image_modality = [ | ||
{"label": "Arterial Spin Labeling", "abbreviations": ["ASL"]}, | ||
{"label": "Diffusion Weighted", "abbreviations": ["DWI", "DW"]}, | ||
{"label": "Electroencephalogram", "abbreviations": ["EEG", "ECG"]}, | ||
{"label": "Flow Weighted", "abbreviations": ["FW"]}, | ||
{"label": "T1 Weighted", "abbreviations": ["T1W", "T1WI"]}, | ||
{"label": "T2 Weighted", "abbreviations": ["TW", "T2W"]}, | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
abbreviations_sex = [ | ||
{"label": "male", "abbreviations": ["MALE", "M", "m"]}, | ||
{"label": "female", "abbreviations": ["WF", "F", "Fem.", "Female", "F."]}, | ||
{"label": "other", "abbreviations": ["OTH", "Oth", "OT"]}, | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
import argparse | ||
import json | ||
import re | ||
import os | ||
import requests | ||
from typing import List, Dict | ||
from langchain_community.chat_models import ChatOllama | ||
from langchain_core.prompts import PromptTemplate | ||
from app.fetch_termURLs.termURL_mappings import ( | ||
sex_mapping, | ||
diagnosis_url, | ||
assessment_url, | ||
image_modality_mapping, | ||
) | ||
|
||
|
||
def fetch_diagnosis_labels(url: str) -> List[str]: | ||
""" | ||
Fetches diagnosis terms from the given API URL. | ||
Args: | ||
url (str): The API URL to fetch terms from. | ||
Returns: | ||
List[str]: A list of diagnosis terms. | ||
""" | ||
response = requests.get(url) | ||
if response.status_code != 200: | ||
raise Exception( | ||
f"Failed to fetch data from {url} with status code {response.status_code}" | ||
) | ||
|
||
data = response.json() | ||
return [ | ||
term.get("Label", "").strip() | ||
for term in data.get("nb:Diagnosis", []) | ||
if term.get("Label") | ||
] | ||
|
||
|
||
def fetch_assessment_labels(url: str) -> List[str]: | ||
""" | ||
Fetches assessment terms from the given API URL. | ||
Args: | ||
url (str): The API URL to fetch terms from. | ||
Returns: | ||
List[str]: A list of assessment terms. | ||
""" | ||
response = requests.get(url) | ||
if response.status_code != 200: | ||
raise Exception( | ||
f"Failed to fetch data from {url} with status code {response.status_code}" | ||
) | ||
|
||
data = response.json() | ||
return [ | ||
term.get("Label", "").strip() | ||
for term in data.get("nb:Assessment", []) | ||
if term.get("Label") | ||
] | ||
|
||
|
||
def fetch_sex_labels(mapping: Dict[str, str]) -> List[str]: | ||
""" | ||
Fetches sex labels from the hardcoded mapping. | ||
Args: | ||
mapping (Dict[str, str]): The hardcoded mapping for sex terms. | ||
Returns: | ||
List[str]: A list of sex labels. | ||
""" | ||
return list(mapping.keys()) | ||
|
||
|
||
def fetch_image_modality_labels(mapping: List[Dict[str, str]]) -> List[str]: | ||
""" | ||
Fetches image modality labels from the hardcoded mapping. | ||
Args: | ||
mapping (List[Dict[str, str]]): The hardcoded mapping for image modality terms. | ||
Returns: | ||
List[str]: A list of image modality labels. | ||
""" | ||
return [item["label"] for item in mapping] | ||
|
||
|
||
def generate_abbreviations(input_terms: List[str], output_file: str) -> None: | ||
""" | ||
Generates abbreviations for a list of terms using ChatOllama model and saves them in a Python file. | ||
Args: | ||
input_terms (List[str]): List of terms for which abbreviations are generated. | ||
output_file (str): Output Python file to save abbreviations. | ||
""" | ||
llm = ChatOllama(model="llama3") | ||
prompt = PromptTemplate( | ||
template="""Please respond with abbreviations most commonly used for the | ||
following term: {term}. | ||
Give only the abbreviations as output and prefer the ones used in research | ||
data and papers. | ||
For example: | ||
Input:'diabetes mellitus' | ||
Output:'DM', 'DM2', 'DM1'. | ||
Do Not Give any explanation in the output. | ||
Input: "{term}" | ||
Output= <abbreviations> | ||
""", | ||
input_variables=["term"], | ||
) | ||
chain = prompt | llm | ||
|
||
abbreviations_list = [] | ||
for term in input_terms: | ||
if not term: | ||
continue | ||
|
||
response = chain.invoke({"term": term}) | ||
response = str(response) | ||
match = re.search(r"content='(.*?)'", response) | ||
|
||
if match: | ||
content_part = match.group(1) | ||
else: | ||
content_part = "" | ||
abbreviations = ( | ||
[abbr.strip() for abbr in content_part.split(",")] | ||
if content_part | ||
else [] | ||
) | ||
abbreviations_dict = {"label": term, "abbreviations": abbreviations} | ||
abbreviations_list.append(abbreviations_dict) | ||
|
||
script_dir = os.path.dirname(os.path.abspath(__file__)) | ||
output_path = os.path.join(script_dir, output_file) | ||
|
||
with open(output_path, "w") as py_file: | ||
py_file.write( | ||
f"{os.path.splitext(os.path.basename(output_file))[0]} = {json.dumps(abbreviations_list, indent=4)}\n" | ||
) | ||
|
||
print(f"Abbreviations saved to {output_path}") | ||
|
||
|
||
def main(output_file_prefix: str) -> None: | ||
""" | ||
Main function to fetch labels and generate abbreviations for diagnosis, assessment, sex, and image modality terms. | ||
Saves each type of labels in a separate Python file. | ||
Args: | ||
output_file_prefix (str): Prefix for output Python files. | ||
""" | ||
|
||
diagnosis_terms = fetch_diagnosis_labels(diagnosis_url) | ||
generate_abbreviations( | ||
diagnosis_terms, f"{output_file_prefix}_diagnosis.py" | ||
) | ||
|
||
assessment_terms = fetch_assessment_labels(assessment_url) | ||
generate_abbreviations( | ||
assessment_terms, f"{output_file_prefix}_assessment.py" | ||
) | ||
|
||
sex_terms = fetch_sex_labels(sex_mapping) | ||
generate_abbreviations(sex_terms, f"{output_file_prefix}_sex.py") | ||
|
||
image_modality_terms = fetch_image_modality_labels(image_modality_mapping) | ||
generate_abbreviations( | ||
image_modality_terms, f"{output_file_prefix}_image_modality.py" | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Process terms.") | ||
parser.add_argument( | ||
"--output-prefix", | ||
type=str, | ||
default="abbreviations", | ||
help="The prefix for output Python files to save abbreviations.", | ||
) | ||
args = parser.parse_args() | ||
main(args.output_prefix) |
Oops, something went wrong.