From 4b22985c32858ecaf39b1b9cbec614fa33d25819 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 28 Sep 2023 10:43:28 -0400 Subject: [PATCH 1/7] Dumped the app directory --- app/__init__.py | 0 app/api/__init__.py | 0 app/api/crud.py | 195 ++++++++++++++++++++++++++++++ app/api/models.py | 62 ++++++++++ app/api/routers/__init__.py | 0 app/api/routers/query.py | 36 ++++++ app/api/utility.py | 234 ++++++++++++++++++++++++++++++++++++ app/main.py | 54 +++++++++ 8 files changed, 581 insertions(+) create mode 100644 app/__init__.py create mode 100644 app/api/__init__.py create mode 100644 app/api/crud.py create mode 100644 app/api/models.py create mode 100644 app/api/routers/__init__.py create mode 100644 app/api/routers/query.py create mode 100644 app/api/utility.py create mode 100644 app/main.py diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/crud.py b/app/api/crud.py new file mode 100644 index 0000000..2b88b6d --- /dev/null +++ b/app/api/crud.py @@ -0,0 +1,195 @@ +"""CRUD functions called by path operations.""" + +import os + +import httpx +import pandas as pd +from fastapi import HTTPException, status + +from . import utility as util +from .models import CohortQueryResponse + +# Order that dataset and subject-level attributes should appear in the API JSON response. +# This order is defined explicitly because when graph-returned results are transformed to a dataframe, +# the default order of columns may be different than the order that variables are given in the SPARQL SELECT state +ATTRIBUTES_ORDER = [ + "sub_id", + "num_sessions", + "session_id", + "session_file_path", + "age", + "sex", + "diagnosis", + "subject_group", + "assessment", + "image_modal", + "dataset_name", + "dataset_uuid", + "dataset_portal_uri", +] + + +async def get( + min_age: float, + max_age: float, + sex: str, + diagnosis: str, + is_control: bool, + min_num_sessions: int, + assessment: str, + image_modal: str, +): + """ + Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_query function. + + Parameters + ---------- + min_age : float + Minimum age of subject. + max_age : float + Maximum age of subject. + sex : str + Sex of subject. + diagnosis : str + Subject diagnosis. + is_control : bool + Whether or not subject is a control. + min_num_sessions : int + Subject minimum number of imaging sessions. + assessment : str + Non-imaging assessment completed by subjects. + image_modal : str + Imaging modality of subject scans. + + Returns + ------- + httpx.response + Response of the POST request. + + """ + try: + response = httpx.post( + url=util.QUERY_URL, + content=util.create_query( + return_agg=util.RETURN_AGG.val, + age=(min_age, max_age), + sex=sex, + diagnosis=diagnosis, + is_control=is_control, + min_num_sessions=min_num_sessions, + assessment=assessment, + image_modal=image_modal, + ), + headers=util.QUERY_HEADER, + auth=httpx.BasicAuth( + os.environ.get(util.GRAPH_USERNAME.name), + os.environ.get(util.GRAPH_PASSWORD.name), + ), + # TODO: Revisit timeout value when query performance is improved + timeout=30.0, + ) + except httpx.ConnectTimeout as exc: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Timed out while connecting to the server. Please confirm that you are connected to the McGill network and try again.", + ) from exc + + if not response.is_success: + raise HTTPException( + status_code=response.status_code, + detail=f"{response.reason_phrase}: {response.text}", + ) + + results = response.json() + + results_dicts = [ + {k: v["value"] for k, v in res.items()} + for res in results["results"]["bindings"] + ] + results_df = pd.DataFrame(results_dicts).reindex(columns=ATTRIBUTES_ORDER) + + response_obj = [] + dataset_cols = ["dataset_uuid", "dataset_name"] + if not results_df.empty: + for (dataset_uuid, dataset_name), group in results_df.groupby( + by=dataset_cols + ): + if util.RETURN_AGG.val: + subject_data = list(group["session_file_path"].dropna()) + else: + subject_data = ( + group.drop(dataset_cols, axis=1) + .groupby(by=["sub_id", "session_id"]) + .agg( + { + "sub_id": "first", + "session_id": "first", + "num_sessions": "first", + "age": "first", + "sex": "first", + "diagnosis": lambda x: list(set(x)), + "subject_group": "first", + "assessment": lambda x: list(set(x)), + "image_modal": lambda x: list(set(x)), + "session_file_path": "first", + } + ) + ) + subject_data = list(subject_data.to_dict("records")) + + response_obj.append( + CohortQueryResponse( + dataset_uuid=dataset_uuid, + dataset_name=dataset_name, + dataset_portal_uri=group["dataset_portal_uri"].iloc[0] + if group["dataset_portal_uri"].notna().all() + else None, + num_matching_subjects=group["sub_id"].nunique(), + subject_data=subject_data, + image_modals=list(group["image_modal"].unique()), + ) + ) + + return response_obj + + +async def get_terms(data_element_URI: str): + """ + Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_terms_query function. + + Parameters + ---------- + data_element_URI : str + Controlled term of neurobagel class for which all the available terms should be retrieved. + + Returns + ------- + httpx.response + Response of the POST request. + """ + response = httpx.post( + url=util.QUERY_URL, + content=util.create_terms_query(data_element_URI), + headers=util.QUERY_HEADER, + auth=httpx.BasicAuth( + os.environ.get(util.GRAPH_USERNAME.name), + os.environ.get(util.GRAPH_PASSWORD.name), + ), + ) + + if not response.is_success: + raise HTTPException( + status_code=response.status_code, + detail=f"{response.reason_phrase}: {response.text}", + ) + + results = response.json() + + results_dict = { + data_element_URI: [ + result["termURL"]["value"] + for result in results["results"]["bindings"] + ] + } + + return results_dict diff --git a/app/api/models.py b/app/api/models.py new file mode 100644 index 0000000..f1a31ff --- /dev/null +++ b/app/api/models.py @@ -0,0 +1,62 @@ +"""Data models.""" + +from typing import Optional + +from fastapi import Query +from fastapi.exceptions import HTTPException +from pydantic import BaseModel, constr, root_validator + +CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" + + +class QueryModel(BaseModel): + """Data model and dependency for API that stores the query parameters to be accepted and validated.""" + + min_age: float = Query(default=None, ge=0) + max_age: float = Query(default=None, ge=0) + sex: constr(regex=CONTROLLED_TERM_REGEX) = None + diagnosis: constr(regex=CONTROLLED_TERM_REGEX) = None + is_control: bool = None + min_num_sessions: int = Query(default=None, ge=1) + assessment: constr(regex=CONTROLLED_TERM_REGEX) = None + image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None + + @root_validator() + def check_maxage_ge_minage(cls, values): + """ + If both age bounds have been set to values other than their defaults (None), ensure that max_age is >= min_age. + NOTE: HTTPException (and not ValueError) raised here to get around "Internal Server Error" raised by + FastAPI when a validation error comes from a Pydantic validator inside a class dependency. + See: + https://github.com/tiangolo/fastapi/issues/1474 + https://github.com/tiangolo/fastapi/discussions/3426 + https://fastapi.tiangolo.com/tutorial/handling-errors/?h=validation#requestvalidationerror-vs-validationerror + """ + mina, maxa = values["min_age"], values["max_age"] + if mina is not None and maxa is not None and (maxa < mina): + raise HTTPException( + status_code=422, + detail="'max_age' must be greater than or equal to 'min_age'", + ) + return values + + @root_validator + def check_exclusive_diagnosis_or_ctrl(cls, values): + if values["diagnosis"] is not None and values["is_control"]: + raise HTTPException( + status_code=422, + detail="Subjects cannot both be healthy controls and have a diagnosis.", + ) + return values + + +class CohortQueryResponse(BaseModel): + """Data model for query results for one matching dataset (i.e., a cohort).""" + + dataset_uuid: str + # dataset_file_path: str # TODO: Revisit this field once we have datasets without imaging info/sessions. + dataset_name: str + dataset_portal_uri: Optional[str] + num_matching_subjects: int + subject_data: list + image_modals: list diff --git a/app/api/routers/__init__.py b/app/api/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/routers/query.py b/app/api/routers/query.py new file mode 100644 index 0000000..9e7db97 --- /dev/null +++ b/app/api/routers/query.py @@ -0,0 +1,36 @@ +"""Router for query path operations.""" + +from typing import List + +from fastapi import APIRouter, Depends +from pydantic import constr + +from .. import crud +from ..models import CONTROLLED_TERM_REGEX, CohortQueryResponse, QueryModel + +router = APIRouter(prefix="/query", tags=["query"]) + + +@router.get("/", response_model=List[CohortQueryResponse]) +async def get_query(query: QueryModel = Depends(QueryModel)): + """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset.""" + response = await crud.get( + query.min_age, + query.max_age, + query.sex, + query.diagnosis, + query.is_control, + query.min_num_sessions, + query.assessment, + query.image_modal, + ) + + return response + + +@router.get("/attributes/{data_element_URI}") +async def get_terms(data_element_URI: constr(regex=CONTROLLED_TERM_REGEX)): + """When a GET request is sent, return a dict with the only key corresponding to controlled term of a neurobagel class and value corresponding to all the available terms.""" + response = await crud.get_terms(data_element_URI) + + return response diff --git a/app/api/utility.py b/app/api/utility.py new file mode 100644 index 0000000..ba3ef1f --- /dev/null +++ b/app/api/utility.py @@ -0,0 +1,234 @@ +"""Constants for Stardog graph connection and utility functions for writing the SPARQL query.""" + +import os +from collections import namedtuple +from typing import Optional + +# Request constants +EnvVar = namedtuple("EnvVar", ["name", "val"]) + +ALLOWED_ORIGINS = EnvVar( + "NB_API_ALLOWED_ORIGINS", os.environ.get("NB_API_ALLOWED_ORIGINS", "") +) + +GRAPH_USERNAME = EnvVar( + "NB_GRAPH_USERNAME", os.environ.get("NB_GRAPH_USERNAME") +) +GRAPH_PASSWORD = EnvVar( + "NB_GRAPH_PASSWORD", os.environ.get("NB_GRAPH_PASSWORD") +) +GRAPH_ADDRESS = EnvVar( + "NB_GRAPH_ADDRESS", os.environ.get("NB_GRAPH_ADDRESS", "206.12.99.17") +) +GRAPH_DB = EnvVar( + "NB_GRAPH_DB", os.environ.get("NB_GRAPH_DB", "test_data/query") +) +GRAPH_PORT = EnvVar("NB_GRAPH_PORT", os.environ.get("NB_GRAPH_PORT", 5820)) +# TODO: Environment variables can't be parsed as bool so this is a workaround but isn't ideal. +# Another option is to switch this to a command-line argument, but that would require changing the +# Dockerfile also since Uvicorn can't accept custom command-line args. +RETURN_AGG = EnvVar( + "NB_RETURN_AGG", os.environ.get("NB_RETURN_AGG", "True").lower() == "true" +) + +QUERY_URL = f"http://{GRAPH_ADDRESS.val}:{GRAPH_PORT.val}/{GRAPH_DB.val}" +QUERY_HEADER = { + "Content-Type": "application/sparql-query", + "Accept": "application/sparql-results+json", +} + +# SPARQL query context +DEFAULT_CONTEXT = """ +PREFIX cogatlas: +PREFIX nb: +PREFIX nbg: +PREFIX ncit: +PREFIX nidm: +PREFIX snomed: +""" + +# Store domains in named tuples +Domain = namedtuple("Domain", ["var", "pred"]) +# Core domains +AGE = Domain("age", "nb:hasAge") +SEX = Domain("sex", "nb:hasSex") +DIAGNOSIS = Domain("diagnosis", "nb:hasDiagnosis") +IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup") +ASSESSMENT = Domain("assessment", "nb:hasAssessment") +IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType") +PROJECT = Domain("project", "nb:hasSamples") + + +CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT] + +IS_CONTROL_TERM = "ncit:C94342" + + +def parse_origins_as_list(allowed_origins: str) -> list: + """Returns user-defined allowed origins as a list.""" + return list(allowed_origins.split(" ")) + + +def create_query( + return_agg: bool, + age: Optional[tuple] = (None, None), + sex: Optional[str] = None, + diagnosis: Optional[str] = None, + is_control: Optional[bool] = None, + min_num_sessions: Optional[int] = None, + assessment: Optional[str] = None, + image_modal: Optional[str] = None, +) -> str: + """ + Creates a SPARQL query using a query template and filters it using the input parameters. + + Parameters + ---------- + return_agg : bool + Whether to return only aggregate query results (and not subject-level attributes besides file paths). + age : tuple, optional + Minimum and maximum age of subject, by default (None, None). + sex : str, optional + Subject sex, by default None. + diagnosis : str, optional + Subject diagnosis, by default None. + is_control : bool, optional + Whether or not subject is a control, by default None. + min_num_sessions : int, optional + Subject minimum number of imaging sessions, by default None. + assessment : str, optional + Non-imaging assessment completed by subjects, by default None. + image_modal : str, optional + Imaging modality of subject scans, by default None. + + Returns + ------- + str + The SPARQL query. + """ + subject_level_filters = "" + + if age[0] is not None: + subject_level_filters += "\n" + f"FILTER (?{AGE.var} >= {age[0]})." + if age[1] is not None: + subject_level_filters += "\n" + f"FILTER (?{AGE.var} <= {age[1]})." + + if sex is not None: + subject_level_filters += "\n" + f"FILTER (?{SEX.var} = {sex})." + + if diagnosis is not None: + subject_level_filters += ( + "\n" + f"FILTER (?{DIAGNOSIS.var} = {diagnosis})." + ) + + if is_control is not None: + if is_control: + subject_level_filters += ( + "\n" + f"FILTER (?{IS_CONTROL.var} = {IS_CONTROL_TERM})." + ) + else: + subject_level_filters += ( + "\n" + f"FILTER (?{IS_CONTROL.var} != {IS_CONTROL_TERM})." + ) + + if min_num_sessions is not None: + subject_level_filters += ( + "\n" + f"FILTER (?num_sessions >= {min_num_sessions})." + ) + + if assessment is not None: + subject_level_filters += ( + "\n" + f"FILTER (?{ASSESSMENT.var} = {assessment})." + ) + + session_level_filters = "" + + if image_modal is not None: + session_level_filters += ( + "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})." + ) + + query_string = f""" + SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex + ?diagnosis ?subject_group ?num_sessions ?session_id ?assessment ?image_modal ?session_file_path + WHERE {{ + ?dataset_uuid a nb:Dataset; + nb:hasLabel ?dataset_name; + nb:hasSamples ?subject. + ?subject a nb:Subject; + nb:hasLabel ?sub_id; + nb:hasSession ?session; + nb:hasSession/nb:hasAcquisition/nb:hasContrastType ?image_modal. + ?session nb:hasLabel ?session_id. + OPTIONAL {{ + ?dataset_uuid nb:hasPortalURI ?dataset_portal_uri. + }} + OPTIONAL {{ + ?session nb:hasFilePath ?session_file_path. + }} + OPTIONAL {{ + ?subject nb:hasAge ?age. + }} + OPTIONAL {{ + ?subject nb:hasSex ?sex. + }} + OPTIONAL {{ + ?subject nb:hasDiagnosis ?diagnosis. + }} + OPTIONAL {{ + ?subject nb:isSubjectGroup ?subject_group. + }} + OPTIONAL {{ + ?subject nb:hasAssessment ?assessment. + }} + {{ + SELECT ?subject (count(distinct ?session) as ?num_sessions) + WHERE {{ + ?subject a nb:Subject; + nb:hasSession ?session. + ?session nb:hasAcquisition/nb:hasContrastType ?image_modal. + {session_level_filters} + }} GROUP BY ?subject + }} + {subject_level_filters} + }} + """ + + # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true, + # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata. + if return_agg: + query_string = f""" + SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal WHERE {{\n + {query_string} + \n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal + """ + + return "\n".join([DEFAULT_CONTEXT, query_string]) + + +def create_terms_query(data_element_URI: str) -> str: + """ + Creates a SPARQL query using a simple query template to retrieve term URLS for a given data element. + + Parameters + ---------- + data_element_URI : str + The URI of the data element for which to retrieve the URIs of all connected term. + + Returns + ------- + str + The SPARQL query. + Examples + -------- + get_terms_query("nb:Assessment") + """ + + query_string = f""" + SELECT DISTINCT ?termURL + WHERE {{ + ?termURL a {data_element_URI}. + }} + """ + + return "\n".join([DEFAULT_CONTEXT, query_string]) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..58643a4 --- /dev/null +++ b/app/main.py @@ -0,0 +1,54 @@ +"""Main app.""" + +import os +import warnings + +import uvicorn +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import ORJSONResponse + +from .api import utility as util +from .api.routers import query + +app = FastAPI(default_response_class=ORJSONResponse) + +app.add_middleware( + CORSMiddleware, + allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val), + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.on_event("startup") +async def auth_check(): + """Checks whether username and password environment variables are set.""" + if ( + # TODO: Check if this error is still raised when variables are empty strings + os.environ.get(util.GRAPH_USERNAME.name) is None + or os.environ.get(util.GRAPH_PASSWORD.name) is None + ): + raise RuntimeError( + f"The application was launched but could not find the {util.GRAPH_USERNAME.name} and / or {util.GRAPH_PASSWORD.name} environment variables." + ) + + +@app.on_event("startup") +async def allowed_origins_check(): + """Raises warning if allowed origins environment variable has not been set or is an empty string.""" + if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "": + warnings.warn( + f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. " + "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. " + f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). " + "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. " + ) + + +app.include_router(query.router) + +# Automatically start uvicorn server on execution of main.py +if __name__ == "__main__": + uvicorn.run("app.main:app", port=8000, reload=True) From 596336945f54c4204b8da020d40d5cc95317e630 Mon Sep 17 00:00:00 2001 From: Sebastian Urchs Date: Thu, 28 Sep 2023 12:10:27 -0400 Subject: [PATCH 2/7] [ENH] Very inspired first federation prototype (#1) * set up route and crud function for basic federation request Still the same * Yay, federation! Co-authored-by: Alyssa Dai Co-authored-by: rmanaem --- app/api/crud.py | 154 ++++++----------------------- app/api/models.py | 61 ++---------- app/api/routers/query.py | 20 ++-- app/api/utility.py | 209 +++++++++------------------------------ app/main.py | 57 ++++------- 5 files changed, 110 insertions(+), 391 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 2b88b6d..40dcaac 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -1,13 +1,9 @@ """CRUD functions called by path operations.""" -import os - import httpx -import pandas as pd -from fastapi import HTTPException, status +from fastapi import HTTPException from . import utility as util -from .models import CohortQueryResponse # Order that dataset and subject-level attributes should appear in the API JSON response. # This order is defined explicitly because when graph-returned results are transformed to a dataframe, @@ -67,129 +63,41 @@ async def get( Response of the POST request. """ - try: - response = httpx.post( - url=util.QUERY_URL, - content=util.create_query( - return_agg=util.RETURN_AGG.val, - age=(min_age, max_age), - sex=sex, - diagnosis=diagnosis, - is_control=is_control, - min_num_sessions=min_num_sessions, - assessment=assessment, - image_modal=image_modal, - ), - headers=util.QUERY_HEADER, - auth=httpx.BasicAuth( - os.environ.get(util.GRAPH_USERNAME.name), - os.environ.get(util.GRAPH_PASSWORD.name), - ), + cross_node_results = [] + params = {} + if min_age: + params["min_age"] = min_age + if max_age: + params["max_age"] = max_age + if sex: + params["sex"] = sex + if diagnosis: + params["diagnosis"] = diagnosis + if is_control: + params["is_control"] = is_control + if min_num_sessions: + params["min_num_sessions"] = min_num_sessions + if assessment: + params["assessment"] = assessment + if image_modal: + params["image_modal"] = image_modal + + for node_url in util.NEUROBAGEL_NODES: + response = httpx.get( + url=node_url, + params=params, # TODO: Revisit timeout value when query performance is improved timeout=30.0, ) - except httpx.ConnectTimeout as exc: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Timed out while connecting to the server. Please confirm that you are connected to the McGill network and try again.", - ) from exc - - if not response.is_success: - raise HTTPException( - status_code=response.status_code, - detail=f"{response.reason_phrase}: {response.text}", - ) - results = response.json() + print("request was: ", response.request.url) - results_dicts = [ - {k: v["value"] for k, v in res.items()} - for res in results["results"]["bindings"] - ] - results_df = pd.DataFrame(results_dicts).reindex(columns=ATTRIBUTES_ORDER) - - response_obj = [] - dataset_cols = ["dataset_uuid", "dataset_name"] - if not results_df.empty: - for (dataset_uuid, dataset_name), group in results_df.groupby( - by=dataset_cols - ): - if util.RETURN_AGG.val: - subject_data = list(group["session_file_path"].dropna()) - else: - subject_data = ( - group.drop(dataset_cols, axis=1) - .groupby(by=["sub_id", "session_id"]) - .agg( - { - "sub_id": "first", - "session_id": "first", - "num_sessions": "first", - "age": "first", - "sex": "first", - "diagnosis": lambda x: list(set(x)), - "subject_group": "first", - "assessment": lambda x: list(set(x)), - "image_modal": lambda x: list(set(x)), - "session_file_path": "first", - } - ) - ) - subject_data = list(subject_data.to_dict("records")) - - response_obj.append( - CohortQueryResponse( - dataset_uuid=dataset_uuid, - dataset_name=dataset_name, - dataset_portal_uri=group["dataset_portal_uri"].iloc[0] - if group["dataset_portal_uri"].notna().all() - else None, - num_matching_subjects=group["sub_id"].nunique(), - subject_data=subject_data, - image_modals=list(group["image_modal"].unique()), - ) + if not response.is_success: + raise HTTPException( + status_code=response.status_code, + detail=f"{response.reason_phrase}: {response.text}", ) - return response_obj - - -async def get_terms(data_element_URI: str): - """ - Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_terms_query function. - - Parameters - ---------- - data_element_URI : str - Controlled term of neurobagel class for which all the available terms should be retrieved. - - Returns - ------- - httpx.response - Response of the POST request. - """ - response = httpx.post( - url=util.QUERY_URL, - content=util.create_terms_query(data_element_URI), - headers=util.QUERY_HEADER, - auth=httpx.BasicAuth( - os.environ.get(util.GRAPH_USERNAME.name), - os.environ.get(util.GRAPH_PASSWORD.name), - ), - ) - - if not response.is_success: - raise HTTPException( - status_code=response.status_code, - detail=f"{response.reason_phrase}: {response.text}", - ) - - results = response.json() - - results_dict = { - data_element_URI: [ - result["termURL"]["value"] - for result in results["results"]["bindings"] - ] - } + cross_node_results.append(response.json()) - return results_dict + return cross_node_results diff --git a/app/api/models.py b/app/api/models.py index f1a31ff..99658cd 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -1,10 +1,5 @@ """Data models.""" - -from typing import Optional - -from fastapi import Query -from fastapi.exceptions import HTTPException -from pydantic import BaseModel, constr, root_validator +from pydantic import BaseModel CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" @@ -12,51 +7,11 @@ class QueryModel(BaseModel): """Data model and dependency for API that stores the query parameters to be accepted and validated.""" - min_age: float = Query(default=None, ge=0) - max_age: float = Query(default=None, ge=0) - sex: constr(regex=CONTROLLED_TERM_REGEX) = None - diagnosis: constr(regex=CONTROLLED_TERM_REGEX) = None + min_age: float = None + max_age: float = None + sex: str = None + diagnosis: str = None is_control: bool = None - min_num_sessions: int = Query(default=None, ge=1) - assessment: constr(regex=CONTROLLED_TERM_REGEX) = None - image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None - - @root_validator() - def check_maxage_ge_minage(cls, values): - """ - If both age bounds have been set to values other than their defaults (None), ensure that max_age is >= min_age. - NOTE: HTTPException (and not ValueError) raised here to get around "Internal Server Error" raised by - FastAPI when a validation error comes from a Pydantic validator inside a class dependency. - See: - https://github.com/tiangolo/fastapi/issues/1474 - https://github.com/tiangolo/fastapi/discussions/3426 - https://fastapi.tiangolo.com/tutorial/handling-errors/?h=validation#requestvalidationerror-vs-validationerror - """ - mina, maxa = values["min_age"], values["max_age"] - if mina is not None and maxa is not None and (maxa < mina): - raise HTTPException( - status_code=422, - detail="'max_age' must be greater than or equal to 'min_age'", - ) - return values - - @root_validator - def check_exclusive_diagnosis_or_ctrl(cls, values): - if values["diagnosis"] is not None and values["is_control"]: - raise HTTPException( - status_code=422, - detail="Subjects cannot both be healthy controls and have a diagnosis.", - ) - return values - - -class CohortQueryResponse(BaseModel): - """Data model for query results for one matching dataset (i.e., a cohort).""" - - dataset_uuid: str - # dataset_file_path: str # TODO: Revisit this field once we have datasets without imaging info/sessions. - dataset_name: str - dataset_portal_uri: Optional[str] - num_matching_subjects: int - subject_data: list - image_modals: list + min_num_sessions: int = None + assessment: str = None + image_modal: str = None diff --git a/app/api/routers/query.py b/app/api/routers/query.py index 9e7db97..abb826c 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -1,17 +1,17 @@ """Router for query path operations.""" -from typing import List - from fastapi import APIRouter, Depends -from pydantic import constr from .. import crud -from ..models import CONTROLLED_TERM_REGEX, CohortQueryResponse, QueryModel +from ..models import QueryModel + +router = APIRouter(prefix="/federate", tags=["federation"]) -router = APIRouter(prefix="/query", tags=["query"]) +# TODO: update to change the logic once crud is modified -@router.get("/", response_model=List[CohortQueryResponse]) + +@router.get("/") async def get_query(query: QueryModel = Depends(QueryModel)): """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset.""" response = await crud.get( @@ -26,11 +26,3 @@ async def get_query(query: QueryModel = Depends(QueryModel)): ) return response - - -@router.get("/attributes/{data_element_URI}") -async def get_terms(data_element_URI: constr(regex=CONTROLLED_TERM_REGEX)): - """When a GET request is sent, return a dict with the only key corresponding to controlled term of a neurobagel class and value corresponding to all the available terms.""" - response = await crud.get_terms(data_element_URI) - - return response diff --git a/app/api/utility.py b/app/api/utility.py index ba3ef1f..f33f1c2 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -2,7 +2,12 @@ import os from collections import namedtuple -from typing import Optional + +# Neurobagel nodes +NEUROBAGEL_NODES = [ + "http://206.12.99.17:8888/query/", + "http://206.12.89.194:8000/query/", +] # Request constants EnvVar = namedtuple("EnvVar", ["name", "val"]) @@ -69,166 +74,42 @@ def parse_origins_as_list(allowed_origins: str) -> list: return list(allowed_origins.split(" ")) -def create_query( - return_agg: bool, - age: Optional[tuple] = (None, None), - sex: Optional[str] = None, - diagnosis: Optional[str] = None, - is_control: Optional[bool] = None, - min_num_sessions: Optional[int] = None, - assessment: Optional[str] = None, - image_modal: Optional[str] = None, -) -> str: - """ - Creates a SPARQL query using a query template and filters it using the input parameters. - - Parameters - ---------- - return_agg : bool - Whether to return only aggregate query results (and not subject-level attributes besides file paths). - age : tuple, optional - Minimum and maximum age of subject, by default (None, None). - sex : str, optional - Subject sex, by default None. - diagnosis : str, optional - Subject diagnosis, by default None. - is_control : bool, optional - Whether or not subject is a control, by default None. - min_num_sessions : int, optional - Subject minimum number of imaging sessions, by default None. - assessment : str, optional - Non-imaging assessment completed by subjects, by default None. - image_modal : str, optional - Imaging modality of subject scans, by default None. - - Returns - ------- - str - The SPARQL query. - """ - subject_level_filters = "" - - if age[0] is not None: - subject_level_filters += "\n" + f"FILTER (?{AGE.var} >= {age[0]})." - if age[1] is not None: - subject_level_filters += "\n" + f"FILTER (?{AGE.var} <= {age[1]})." - - if sex is not None: - subject_level_filters += "\n" + f"FILTER (?{SEX.var} = {sex})." - - if diagnosis is not None: - subject_level_filters += ( - "\n" + f"FILTER (?{DIAGNOSIS.var} = {diagnosis})." - ) - - if is_control is not None: - if is_control: - subject_level_filters += ( - "\n" + f"FILTER (?{IS_CONTROL.var} = {IS_CONTROL_TERM})." - ) - else: - subject_level_filters += ( - "\n" + f"FILTER (?{IS_CONTROL.var} != {IS_CONTROL_TERM})." - ) - - if min_num_sessions is not None: - subject_level_filters += ( - "\n" + f"FILTER (?num_sessions >= {min_num_sessions})." - ) - - if assessment is not None: - subject_level_filters += ( - "\n" + f"FILTER (?{ASSESSMENT.var} = {assessment})." - ) - - session_level_filters = "" - - if image_modal is not None: - session_level_filters += ( - "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})." - ) - - query_string = f""" - SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex - ?diagnosis ?subject_group ?num_sessions ?session_id ?assessment ?image_modal ?session_file_path - WHERE {{ - ?dataset_uuid a nb:Dataset; - nb:hasLabel ?dataset_name; - nb:hasSamples ?subject. - ?subject a nb:Subject; - nb:hasLabel ?sub_id; - nb:hasSession ?session; - nb:hasSession/nb:hasAcquisition/nb:hasContrastType ?image_modal. - ?session nb:hasLabel ?session_id. - OPTIONAL {{ - ?dataset_uuid nb:hasPortalURI ?dataset_portal_uri. - }} - OPTIONAL {{ - ?session nb:hasFilePath ?session_file_path. - }} - OPTIONAL {{ - ?subject nb:hasAge ?age. - }} - OPTIONAL {{ - ?subject nb:hasSex ?sex. - }} - OPTIONAL {{ - ?subject nb:hasDiagnosis ?diagnosis. - }} - OPTIONAL {{ - ?subject nb:isSubjectGroup ?subject_group. - }} - OPTIONAL {{ - ?subject nb:hasAssessment ?assessment. - }} - {{ - SELECT ?subject (count(distinct ?session) as ?num_sessions) - WHERE {{ - ?subject a nb:Subject; - nb:hasSession ?session. - ?session nb:hasAcquisition/nb:hasContrastType ?image_modal. - {session_level_filters} - }} GROUP BY ?subject - }} - {subject_level_filters} - }} - """ - - # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true, - # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata. - if return_agg: - query_string = f""" - SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal WHERE {{\n - {query_string} - \n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal - """ - - return "\n".join([DEFAULT_CONTEXT, query_string]) - - -def create_terms_query(data_element_URI: str) -> str: - """ - Creates a SPARQL query using a simple query template to retrieve term URLS for a given data element. - - Parameters - ---------- - data_element_URI : str - The URI of the data element for which to retrieve the URIs of all connected term. - - Returns - ------- - str - The SPARQL query. - Examples - -------- - get_terms_query("nb:Assessment") - """ - - query_string = f""" - SELECT DISTINCT ?termURL - WHERE {{ - ?termURL a {data_element_URI}. - }} - """ - - return "\n".join([DEFAULT_CONTEXT, query_string]) +# def create_query( +# return_agg: bool, +# age: Optional[tuple] = (None, None), +# sex: Optional[str] = None, +# diagnosis: Optional[str] = None, +# is_control: Optional[bool] = None, +# min_num_sessions: Optional[int] = None, +# assessment: Optional[str] = None, +# image_modal: Optional[str] = None, +# ) -> str: +# """ +# Creates a neurobagel API query using a query template and filters it using the input parameters. + +# Parameters +# ---------- +# return_agg : bool +# Whether to return only aggregate query results (and not subject-level attributes besides file paths). +# age : tuple, optional +# Minimum and maximum age of subject, by default (None, None). +# sex : str, optional +# Subject sex, by default None. +# diagnosis : str, optional +# Subject diagnosis, by default None. +# is_control : bool, optional +# Whether or not subject is a control, by default None. +# min_num_sessions : int, optional +# Subject minimum number of imaging sessions, by default None. +# assessment : str, optional +# Non-imaging assessment completed by subjects, by default None. +# image_modal : str, optional +# Imaging modality of subject scans, by default None. + +# Returns +# ------- +# str +# The Sneurobagel API query string. +# """ +# query_string = "?" +# return f"" diff --git a/app/main.py b/app/main.py index 58643a4..20db039 100644 --- a/app/main.py +++ b/app/main.py @@ -1,50 +1,33 @@ """Main app.""" -import os -import warnings - import uvicorn from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import ORJSONResponse -from .api import utility as util from .api.routers import query app = FastAPI(default_response_class=ORJSONResponse) -app.add_middleware( - CORSMiddleware, - allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val), - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - - -@app.on_event("startup") -async def auth_check(): - """Checks whether username and password environment variables are set.""" - if ( - # TODO: Check if this error is still raised when variables are empty strings - os.environ.get(util.GRAPH_USERNAME.name) is None - or os.environ.get(util.GRAPH_PASSWORD.name) is None - ): - raise RuntimeError( - f"The application was launched but could not find the {util.GRAPH_USERNAME.name} and / or {util.GRAPH_PASSWORD.name} environment variables." - ) - - -@app.on_event("startup") -async def allowed_origins_check(): - """Raises warning if allowed origins environment variable has not been set or is an empty string.""" - if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "": - warnings.warn( - f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. " - "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. " - f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). " - "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. " - ) +# TODO: bring this back when we spin up the query tool +# app.add_middleware( +# CORSMiddleware, +# allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val), +# allow_credentials=True, +# allow_methods=["*"], +# allow_headers=["*"], +# ) + + +# @app.on_event("startup") +# async def allowed_origins_check(): +# """Raises warning if allowed origins environment variable has not been set or is an empty string.""" +# if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "": +# warnings.warn( +# f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. " +# "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. " +# f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). " +# "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. " +# ) app.include_router(query.router) From 10a3b8248ad5d5ddbbbed140033fcd08f0588f0e Mon Sep 17 00:00:00 2001 From: Arman Jahanpour <77515879+rmanaem@users.noreply.github.com> Date: Thu, 28 Sep 2023 16:55:16 -0400 Subject: [PATCH 3/7] [MNT] Cleaned up the existing code (#3) * Changed router prefix and tag to `query` * Cleaned the good stuff Co-authored-by: Sebastian Urchs Co-authored-by: Alyssa Dai * Updated docstring for `get` function of `crud.py` Co-authored-by: Alyssa Dai --------- Co-authored-by: Sebastian Urchs Co-authored-by: Alyssa Dai Co-authored-by: Alyssa Dai --- app/api/crud.py | 26 +-------- app/api/models.py | 2 - app/api/routers/query.py | 5 +- app/api/utility.py | 110 +-------------------------------------- app/main.py | 28 +++------- 5 files changed, 12 insertions(+), 159 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 40dcaac..c8a4c06 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -5,26 +5,6 @@ from . import utility as util -# Order that dataset and subject-level attributes should appear in the API JSON response. -# This order is defined explicitly because when graph-returned results are transformed to a dataframe, -# the default order of columns may be different than the order that variables are given in the SPARQL SELECT state -ATTRIBUTES_ORDER = [ - "sub_id", - "num_sessions", - "session_id", - "session_file_path", - "age", - "sex", - "diagnosis", - "subject_group", - "assessment", - "image_modal", - "dataset_name", - "dataset_uuid", - "dataset_portal_uri", -] - - async def get( min_age: float, max_age: float, @@ -36,7 +16,7 @@ async def get( image_modal: str, ): """ - Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_query function. + Makes GET requests to one or more Neurobagel node APIs using httpx where the parameters are Neurobagel query parameters. Parameters ---------- @@ -90,14 +70,12 @@ async def get( timeout=30.0, ) - print("request was: ", response.request.url) - if not response.is_success: raise HTTPException( status_code=response.status_code, detail=f"{response.reason_phrase}: {response.text}", ) - cross_node_results.append(response.json()) + cross_node_results += response.json() return cross_node_results diff --git a/app/api/models.py b/app/api/models.py index 99658cd..1c24396 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -1,8 +1,6 @@ """Data models.""" from pydantic import BaseModel -CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" - class QueryModel(BaseModel): """Data model and dependency for API that stores the query parameters to be accepted and validated.""" diff --git a/app/api/routers/query.py b/app/api/routers/query.py index abb826c..e4fad74 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -5,10 +5,7 @@ from .. import crud from ..models import QueryModel -router = APIRouter(prefix="/federate", tags=["federation"]) - - -# TODO: update to change the logic once crud is modified +router = APIRouter(prefix="/query", tags=["query"]) @router.get("/") diff --git a/app/api/utility.py b/app/api/utility.py index f33f1c2..63058e7 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -1,115 +1,7 @@ -"""Constants for Stardog graph connection and utility functions for writing the SPARQL query.""" - -import os -from collections import namedtuple +"""Constants for federation.""" # Neurobagel nodes NEUROBAGEL_NODES = [ "http://206.12.99.17:8888/query/", "http://206.12.89.194:8000/query/", ] - -# Request constants -EnvVar = namedtuple("EnvVar", ["name", "val"]) - -ALLOWED_ORIGINS = EnvVar( - "NB_API_ALLOWED_ORIGINS", os.environ.get("NB_API_ALLOWED_ORIGINS", "") -) - -GRAPH_USERNAME = EnvVar( - "NB_GRAPH_USERNAME", os.environ.get("NB_GRAPH_USERNAME") -) -GRAPH_PASSWORD = EnvVar( - "NB_GRAPH_PASSWORD", os.environ.get("NB_GRAPH_PASSWORD") -) -GRAPH_ADDRESS = EnvVar( - "NB_GRAPH_ADDRESS", os.environ.get("NB_GRAPH_ADDRESS", "206.12.99.17") -) -GRAPH_DB = EnvVar( - "NB_GRAPH_DB", os.environ.get("NB_GRAPH_DB", "test_data/query") -) -GRAPH_PORT = EnvVar("NB_GRAPH_PORT", os.environ.get("NB_GRAPH_PORT", 5820)) -# TODO: Environment variables can't be parsed as bool so this is a workaround but isn't ideal. -# Another option is to switch this to a command-line argument, but that would require changing the -# Dockerfile also since Uvicorn can't accept custom command-line args. -RETURN_AGG = EnvVar( - "NB_RETURN_AGG", os.environ.get("NB_RETURN_AGG", "True").lower() == "true" -) - -QUERY_URL = f"http://{GRAPH_ADDRESS.val}:{GRAPH_PORT.val}/{GRAPH_DB.val}" -QUERY_HEADER = { - "Content-Type": "application/sparql-query", - "Accept": "application/sparql-results+json", -} - -# SPARQL query context -DEFAULT_CONTEXT = """ -PREFIX cogatlas: -PREFIX nb: -PREFIX nbg: -PREFIX ncit: -PREFIX nidm: -PREFIX snomed: -""" - -# Store domains in named tuples -Domain = namedtuple("Domain", ["var", "pred"]) -# Core domains -AGE = Domain("age", "nb:hasAge") -SEX = Domain("sex", "nb:hasSex") -DIAGNOSIS = Domain("diagnosis", "nb:hasDiagnosis") -IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup") -ASSESSMENT = Domain("assessment", "nb:hasAssessment") -IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType") -PROJECT = Domain("project", "nb:hasSamples") - - -CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT] - -IS_CONTROL_TERM = "ncit:C94342" - - -def parse_origins_as_list(allowed_origins: str) -> list: - """Returns user-defined allowed origins as a list.""" - return list(allowed_origins.split(" ")) - - -# def create_query( -# return_agg: bool, -# age: Optional[tuple] = (None, None), -# sex: Optional[str] = None, -# diagnosis: Optional[str] = None, -# is_control: Optional[bool] = None, -# min_num_sessions: Optional[int] = None, -# assessment: Optional[str] = None, -# image_modal: Optional[str] = None, -# ) -> str: -# """ -# Creates a neurobagel API query using a query template and filters it using the input parameters. - -# Parameters -# ---------- -# return_agg : bool -# Whether to return only aggregate query results (and not subject-level attributes besides file paths). -# age : tuple, optional -# Minimum and maximum age of subject, by default (None, None). -# sex : str, optional -# Subject sex, by default None. -# diagnosis : str, optional -# Subject diagnosis, by default None. -# is_control : bool, optional -# Whether or not subject is a control, by default None. -# min_num_sessions : int, optional -# Subject minimum number of imaging sessions, by default None. -# assessment : str, optional -# Non-imaging assessment completed by subjects, by default None. -# image_modal : str, optional -# Imaging modality of subject scans, by default None. - -# Returns -# ------- -# str -# The Sneurobagel API query string. -# """ -# query_string = "?" -# return f"" diff --git a/app/main.py b/app/main.py index 20db039..04f71dc 100644 --- a/app/main.py +++ b/app/main.py @@ -3,31 +3,19 @@ import uvicorn from fastapi import FastAPI from fastapi.responses import ORJSONResponse +from fastapi.middleware.cors import CORSMiddleware from .api.routers import query app = FastAPI(default_response_class=ORJSONResponse) -# TODO: bring this back when we spin up the query tool -# app.add_middleware( -# CORSMiddleware, -# allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val), -# allow_credentials=True, -# allow_methods=["*"], -# allow_headers=["*"], -# ) - - -# @app.on_event("startup") -# async def allowed_origins_check(): -# """Raises warning if allowed origins environment variable has not been set or is an empty string.""" -# if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "": -# warnings.warn( -# f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. " -# "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. " -# f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). " -# "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. " -# ) +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) app.include_router(query.router) From 1c38dcbd2440e77624e6d5f4131684a57a31c985 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 28 Sep 2023 17:12:12 -0400 Subject: [PATCH 4/7] Added DOCKERFILE --- Dockerfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b8a17d6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.10 + +WORKDIR /usr/src/ + +COPY ./requirements.txt /usr/src/app/requirements.txt + +RUN pip install --no-cache-dir --upgrade -r /usr/src/app/requirements.txt + +COPY ./app /usr/src/app + +# NB_API_PORT, representing the port on which the API will be exposed, +# is an environment variable that will always have a default value of 8000 when building the image +# but can be overridden when running the container. +ENTRYPOINT uvicorn app.main:app --proxy-headers --host 0.0.0.0 --port ${NB_API_PORT:-8000} From 2902cccb6b1eb111cc20c729bf6dbd46898edff5 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 4 Oct 2023 13:45:30 -0400 Subject: [PATCH 5/7] Updated to point to database --- app/api/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/utility.py b/app/api/utility.py index 63058e7..357f4b8 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -2,6 +2,6 @@ # Neurobagel nodes NEUROBAGEL_NODES = [ - "http://206.12.99.17:8888/query/", + "http://206.12.99.17:8000/query/", "http://206.12.89.194:8000/query/", ] From 6e8b9d2ce7249ffd819d98c64d8bc7b9286cfa87 Mon Sep 17 00:00:00 2001 From: Arman Jahanpour <77515879+rmanaem@users.noreply.github.com> Date: Fri, 6 Oct 2023 13:20:23 -0400 Subject: [PATCH 6/7] [ENH] Turned NEUROBAGEL_NODES to an env var (#6) --- app/api/utility.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/api/utility.py b/app/api/utility.py index 357f4b8..efe1aa9 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -1,7 +1,6 @@ """Constants for federation.""" +import os + # Neurobagel nodes -NEUROBAGEL_NODES = [ - "http://206.12.99.17:8000/query/", - "http://206.12.89.194:8000/query/", -] +NEUROBAGEL_NODES = os.environ.get("NB_NODES", ["http://206.12.99.17:8000/query/"]) \ No newline at end of file From 6468cecc8f0a7ecc5545763dabd896301dee45e7 Mon Sep 17 00:00:00 2001 From: Arman Jahanpour <77515879+rmanaem@users.noreply.github.com> Date: Tue, 10 Oct 2023 15:19:02 -0400 Subject: [PATCH 7/7] [CI] Set up workflows (#7) * Turned NEUROBAGEL_NODES to an env var * Added dependabot.yaml config file * Added codespell.yaml workflow file * Added lint.yaml workflow file * Added build_docker.yaml workflow file --- .github/dependabot.yaml | 12 ++++++++++ .github/workflows/build_docker.yaml | 32 ++++++++++++++++++++++++++ .github/workflows/codespell.yaml | 19 ++++++++++++++++ .github/workflows/lint.yaml | 35 +++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 .github/dependabot.yaml create mode 100644 .github/workflows/build_docker.yaml create mode 100644 .github/workflows/codespell.yaml create mode 100644 .github/workflows/lint.yaml diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..c1f7a9d --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,12 @@ +# Documentation +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'weekly' + labels: + - "_bot" + - "maint:dependency" + - "type:maintenance" diff --git a/.github/workflows/build_docker.yaml b/.github/workflows/build_docker.yaml new file mode 100644 index 0000000..63511cd --- /dev/null +++ b/.github/workflows/build_docker.yaml @@ -0,0 +1,32 @@ +name: build docker + +on: + push: + branches: + - main + workflow_dispatch: + +jobs: + build docker: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v4 + - + name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ secrets.DOCKERHUB_USERNAME }}/federation_api:latest \ No newline at end of file diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml new file mode 100644 index 0000000..3bda702 --- /dev/null +++ b/.github/workflows/codespell.yaml @@ -0,0 +1,19 @@ +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..ab395dc --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,35 @@ +name: lint + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + lint: + + runs-on: ubuntu-latest + + steps: + + - uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --per-file-ignores=./app/api/models.py:F722 + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics \ No newline at end of file