From 4b22985c32858ecaf39b1b9cbec614fa33d25819 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 28 Sep 2023 10:43:28 -0400
Subject: [PATCH 1/7] Dumped the app directory

---
 app/__init__.py             |   0
 app/api/__init__.py         |   0
 app/api/crud.py             | 195 ++++++++++++++++++++++++++++++
 app/api/models.py           |  62 ++++++++++
 app/api/routers/__init__.py |   0
 app/api/routers/query.py    |  36 ++++++
 app/api/utility.py          | 234 ++++++++++++++++++++++++++++++++++++
 app/main.py                 |  54 +++++++++
 8 files changed, 581 insertions(+)
 create mode 100644 app/__init__.py
 create mode 100644 app/api/__init__.py
 create mode 100644 app/api/crud.py
 create mode 100644 app/api/models.py
 create mode 100644 app/api/routers/__init__.py
 create mode 100644 app/api/routers/query.py
 create mode 100644 app/api/utility.py
 create mode 100644 app/main.py

diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/api/__init__.py b/app/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/api/crud.py b/app/api/crud.py
new file mode 100644
index 0000000..2b88b6d
--- /dev/null
+++ b/app/api/crud.py
@@ -0,0 +1,195 @@
+"""CRUD functions called by path operations."""
+
+import os
+
+import httpx
+import pandas as pd
+from fastapi import HTTPException, status
+
+from . import utility as util
+from .models import CohortQueryResponse
+
+# Order that dataset and subject-level attributes should appear in the API JSON response.
+# This order is defined explicitly because when graph-returned results are transformed to a dataframe,
+# the default order of columns may be different than the order that variables are given in the SPARQL SELECT state
+ATTRIBUTES_ORDER = [
+    "sub_id",
+    "num_sessions",
+    "session_id",
+    "session_file_path",
+    "age",
+    "sex",
+    "diagnosis",
+    "subject_group",
+    "assessment",
+    "image_modal",
+    "dataset_name",
+    "dataset_uuid",
+    "dataset_portal_uri",
+]
+
+
+async def get(
+    min_age: float,
+    max_age: float,
+    sex: str,
+    diagnosis: str,
+    is_control: bool,
+    min_num_sessions: int,
+    assessment: str,
+    image_modal: str,
+):
+    """
+    Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_query function.
+
+    Parameters
+    ----------
+    min_age : float
+        Minimum age of subject.
+    max_age : float
+        Maximum age of subject.
+    sex : str
+        Sex of subject.
+    diagnosis : str
+        Subject diagnosis.
+    is_control : bool
+        Whether or not subject is a control.
+    min_num_sessions : int
+        Subject minimum number of imaging sessions.
+    assessment : str
+        Non-imaging assessment completed by subjects.
+    image_modal : str
+        Imaging modality of subject scans.
+
+    Returns
+    -------
+    httpx.response
+        Response of the POST request.
+
+    """
+    try:
+        response = httpx.post(
+            url=util.QUERY_URL,
+            content=util.create_query(
+                return_agg=util.RETURN_AGG.val,
+                age=(min_age, max_age),
+                sex=sex,
+                diagnosis=diagnosis,
+                is_control=is_control,
+                min_num_sessions=min_num_sessions,
+                assessment=assessment,
+                image_modal=image_modal,
+            ),
+            headers=util.QUERY_HEADER,
+            auth=httpx.BasicAuth(
+                os.environ.get(util.GRAPH_USERNAME.name),
+                os.environ.get(util.GRAPH_PASSWORD.name),
+            ),
+            # TODO: Revisit timeout value when query performance is improved
+            timeout=30.0,
+        )
+    except httpx.ConnectTimeout as exc:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Timed out while connecting to the server. Please confirm that you are connected to the McGill network and try again.",
+        ) from exc
+
+    if not response.is_success:
+        raise HTTPException(
+            status_code=response.status_code,
+            detail=f"{response.reason_phrase}: {response.text}",
+        )
+
+    results = response.json()
+
+    results_dicts = [
+        {k: v["value"] for k, v in res.items()}
+        for res in results["results"]["bindings"]
+    ]
+    results_df = pd.DataFrame(results_dicts).reindex(columns=ATTRIBUTES_ORDER)
+
+    response_obj = []
+    dataset_cols = ["dataset_uuid", "dataset_name"]
+    if not results_df.empty:
+        for (dataset_uuid, dataset_name), group in results_df.groupby(
+            by=dataset_cols
+        ):
+            if util.RETURN_AGG.val:
+                subject_data = list(group["session_file_path"].dropna())
+            else:
+                subject_data = (
+                    group.drop(dataset_cols, axis=1)
+                    .groupby(by=["sub_id", "session_id"])
+                    .agg(
+                        {
+                            "sub_id": "first",
+                            "session_id": "first",
+                            "num_sessions": "first",
+                            "age": "first",
+                            "sex": "first",
+                            "diagnosis": lambda x: list(set(x)),
+                            "subject_group": "first",
+                            "assessment": lambda x: list(set(x)),
+                            "image_modal": lambda x: list(set(x)),
+                            "session_file_path": "first",
+                        }
+                    )
+                )
+                subject_data = list(subject_data.to_dict("records"))
+
+            response_obj.append(
+                CohortQueryResponse(
+                    dataset_uuid=dataset_uuid,
+                    dataset_name=dataset_name,
+                    dataset_portal_uri=group["dataset_portal_uri"].iloc[0]
+                    if group["dataset_portal_uri"].notna().all()
+                    else None,
+                    num_matching_subjects=group["sub_id"].nunique(),
+                    subject_data=subject_data,
+                    image_modals=list(group["image_modal"].unique()),
+                )
+            )
+
+    return response_obj
+
+
+async def get_terms(data_element_URI: str):
+    """
+    Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_terms_query function.
+
+    Parameters
+    ----------
+    data_element_URI : str
+        Controlled term of neurobagel class for which all the available terms should be retrieved.
+
+    Returns
+    -------
+    httpx.response
+        Response of the POST request.
+    """
+    response = httpx.post(
+        url=util.QUERY_URL,
+        content=util.create_terms_query(data_element_URI),
+        headers=util.QUERY_HEADER,
+        auth=httpx.BasicAuth(
+            os.environ.get(util.GRAPH_USERNAME.name),
+            os.environ.get(util.GRAPH_PASSWORD.name),
+        ),
+    )
+
+    if not response.is_success:
+        raise HTTPException(
+            status_code=response.status_code,
+            detail=f"{response.reason_phrase}: {response.text}",
+        )
+
+    results = response.json()
+
+    results_dict = {
+        data_element_URI: [
+            result["termURL"]["value"]
+            for result in results["results"]["bindings"]
+        ]
+    }
+
+    return results_dict
diff --git a/app/api/models.py b/app/api/models.py
new file mode 100644
index 0000000..f1a31ff
--- /dev/null
+++ b/app/api/models.py
@@ -0,0 +1,62 @@
+"""Data models."""
+
+from typing import Optional
+
+from fastapi import Query
+from fastapi.exceptions import HTTPException
+from pydantic import BaseModel, constr, root_validator
+
+CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"
+
+
+class QueryModel(BaseModel):
+    """Data model and dependency for API that stores the query parameters to be accepted and validated."""
+
+    min_age: float = Query(default=None, ge=0)
+    max_age: float = Query(default=None, ge=0)
+    sex: constr(regex=CONTROLLED_TERM_REGEX) = None
+    diagnosis: constr(regex=CONTROLLED_TERM_REGEX) = None
+    is_control: bool = None
+    min_num_sessions: int = Query(default=None, ge=1)
+    assessment: constr(regex=CONTROLLED_TERM_REGEX) = None
+    image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None
+
+    @root_validator()
+    def check_maxage_ge_minage(cls, values):
+        """
+        If both age bounds have been set to values other than their defaults (None), ensure that max_age is >= min_age.
+        NOTE: HTTPException (and not ValueError) raised here to get around "Internal Server Error" raised by
+        FastAPI when a validation error comes from a Pydantic validator inside a class dependency.
+        See:
+        https://github.com/tiangolo/fastapi/issues/1474
+        https://github.com/tiangolo/fastapi/discussions/3426
+        https://fastapi.tiangolo.com/tutorial/handling-errors/?h=validation#requestvalidationerror-vs-validationerror
+        """
+        mina, maxa = values["min_age"], values["max_age"]
+        if mina is not None and maxa is not None and (maxa < mina):
+            raise HTTPException(
+                status_code=422,
+                detail="'max_age' must be greater than or equal to 'min_age'",
+            )
+        return values
+
+    @root_validator
+    def check_exclusive_diagnosis_or_ctrl(cls, values):
+        if values["diagnosis"] is not None and values["is_control"]:
+            raise HTTPException(
+                status_code=422,
+                detail="Subjects cannot both be healthy controls and have a diagnosis.",
+            )
+        return values
+
+
+class CohortQueryResponse(BaseModel):
+    """Data model for query results for one matching dataset (i.e., a cohort)."""
+
+    dataset_uuid: str
+    # dataset_file_path: str  # TODO: Revisit this field once we have datasets without imaging info/sessions.
+    dataset_name: str
+    dataset_portal_uri: Optional[str]
+    num_matching_subjects: int
+    subject_data: list
+    image_modals: list
diff --git a/app/api/routers/__init__.py b/app/api/routers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/api/routers/query.py b/app/api/routers/query.py
new file mode 100644
index 0000000..9e7db97
--- /dev/null
+++ b/app/api/routers/query.py
@@ -0,0 +1,36 @@
+"""Router for query path operations."""
+
+from typing import List
+
+from fastapi import APIRouter, Depends
+from pydantic import constr
+
+from .. import crud
+from ..models import CONTROLLED_TERM_REGEX, CohortQueryResponse, QueryModel
+
+router = APIRouter(prefix="/query", tags=["query"])
+
+
+@router.get("/", response_model=List[CohortQueryResponse])
+async def get_query(query: QueryModel = Depends(QueryModel)):
+    """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset."""
+    response = await crud.get(
+        query.min_age,
+        query.max_age,
+        query.sex,
+        query.diagnosis,
+        query.is_control,
+        query.min_num_sessions,
+        query.assessment,
+        query.image_modal,
+    )
+
+    return response
+
+
+@router.get("/attributes/{data_element_URI}")
+async def get_terms(data_element_URI: constr(regex=CONTROLLED_TERM_REGEX)):
+    """When a GET request is sent, return a dict with the only key corresponding to controlled term of a neurobagel class and value corresponding to all the available terms."""
+    response = await crud.get_terms(data_element_URI)
+
+    return response
diff --git a/app/api/utility.py b/app/api/utility.py
new file mode 100644
index 0000000..ba3ef1f
--- /dev/null
+++ b/app/api/utility.py
@@ -0,0 +1,234 @@
+"""Constants for Stardog graph connection and utility functions for writing the SPARQL query."""
+
+import os
+from collections import namedtuple
+from typing import Optional
+
+# Request constants
+EnvVar = namedtuple("EnvVar", ["name", "val"])
+
+ALLOWED_ORIGINS = EnvVar(
+    "NB_API_ALLOWED_ORIGINS", os.environ.get("NB_API_ALLOWED_ORIGINS", "")
+)
+
+GRAPH_USERNAME = EnvVar(
+    "NB_GRAPH_USERNAME", os.environ.get("NB_GRAPH_USERNAME")
+)
+GRAPH_PASSWORD = EnvVar(
+    "NB_GRAPH_PASSWORD", os.environ.get("NB_GRAPH_PASSWORD")
+)
+GRAPH_ADDRESS = EnvVar(
+    "NB_GRAPH_ADDRESS", os.environ.get("NB_GRAPH_ADDRESS", "206.12.99.17")
+)
+GRAPH_DB = EnvVar(
+    "NB_GRAPH_DB", os.environ.get("NB_GRAPH_DB", "test_data/query")
+)
+GRAPH_PORT = EnvVar("NB_GRAPH_PORT", os.environ.get("NB_GRAPH_PORT", 5820))
+# TODO: Environment variables can't be parsed as bool so this is a workaround but isn't ideal.
+# Another option is to switch this to a command-line argument, but that would require changing the
+# Dockerfile also since Uvicorn can't accept custom command-line args.
+RETURN_AGG = EnvVar(
+    "NB_RETURN_AGG", os.environ.get("NB_RETURN_AGG", "True").lower() == "true"
+)
+
+QUERY_URL = f"http://{GRAPH_ADDRESS.val}:{GRAPH_PORT.val}/{GRAPH_DB.val}"
+QUERY_HEADER = {
+    "Content-Type": "application/sparql-query",
+    "Accept": "application/sparql-results+json",
+}
+
+# SPARQL query context
+DEFAULT_CONTEXT = """
+PREFIX cogatlas: <https://www.cognitiveatlas.org/task/id/>
+PREFIX nb: <http://neurobagel.org/vocab/>
+PREFIX nbg: <http://neurobagel.org/graph/>
+PREFIX ncit: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
+PREFIX nidm: <http://purl.org/nidash/nidm#>
+PREFIX snomed: <http://purl.bioontology.org/ontology/SNOMEDCT/>
+"""
+
+# Store domains in named tuples
+Domain = namedtuple("Domain", ["var", "pred"])
+# Core domains
+AGE = Domain("age", "nb:hasAge")
+SEX = Domain("sex", "nb:hasSex")
+DIAGNOSIS = Domain("diagnosis", "nb:hasDiagnosis")
+IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup")
+ASSESSMENT = Domain("assessment", "nb:hasAssessment")
+IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType")
+PROJECT = Domain("project", "nb:hasSamples")
+
+
+CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT]
+
+IS_CONTROL_TERM = "ncit:C94342"
+
+
+def parse_origins_as_list(allowed_origins: str) -> list:
+    """Returns user-defined allowed origins as a list."""
+    return list(allowed_origins.split(" "))
+
+
+def create_query(
+    return_agg: bool,
+    age: Optional[tuple] = (None, None),
+    sex: Optional[str] = None,
+    diagnosis: Optional[str] = None,
+    is_control: Optional[bool] = None,
+    min_num_sessions: Optional[int] = None,
+    assessment: Optional[str] = None,
+    image_modal: Optional[str] = None,
+) -> str:
+    """
+    Creates a SPARQL query using a query template and filters it using the input parameters.
+
+    Parameters
+    ----------
+    return_agg : bool
+        Whether to return only aggregate query results (and not subject-level attributes besides file paths).
+    age : tuple, optional
+        Minimum and maximum age of subject, by default (None, None).
+    sex : str, optional
+        Subject sex, by default None.
+    diagnosis : str, optional
+        Subject diagnosis, by default None.
+    is_control : bool, optional
+        Whether or not subject is a control, by default None.
+    min_num_sessions : int, optional
+        Subject minimum number of imaging sessions, by default None.
+    assessment : str, optional
+        Non-imaging assessment completed by subjects, by default None.
+    image_modal : str, optional
+        Imaging modality of subject scans, by default None.
+
+    Returns
+    -------
+    str
+        The SPARQL query.
+    """
+    subject_level_filters = ""
+
+    if age[0] is not None:
+        subject_level_filters += "\n" + f"FILTER (?{AGE.var} >= {age[0]})."
+    if age[1] is not None:
+        subject_level_filters += "\n" + f"FILTER (?{AGE.var} <= {age[1]})."
+
+    if sex is not None:
+        subject_level_filters += "\n" + f"FILTER (?{SEX.var} = {sex})."
+
+    if diagnosis is not None:
+        subject_level_filters += (
+            "\n" + f"FILTER (?{DIAGNOSIS.var} = {diagnosis})."
+        )
+
+    if is_control is not None:
+        if is_control:
+            subject_level_filters += (
+                "\n" + f"FILTER (?{IS_CONTROL.var} = {IS_CONTROL_TERM})."
+            )
+        else:
+            subject_level_filters += (
+                "\n" + f"FILTER (?{IS_CONTROL.var} != {IS_CONTROL_TERM})."
+            )
+
+    if min_num_sessions is not None:
+        subject_level_filters += (
+            "\n" + f"FILTER (?num_sessions >= {min_num_sessions})."
+        )
+
+    if assessment is not None:
+        subject_level_filters += (
+            "\n" + f"FILTER (?{ASSESSMENT.var} = {assessment})."
+        )
+
+    session_level_filters = ""
+
+    if image_modal is not None:
+        session_level_filters += (
+            "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
+        )
+
+    query_string = f"""
+        SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
+        ?diagnosis ?subject_group ?num_sessions ?session_id ?assessment ?image_modal ?session_file_path
+        WHERE {{
+            ?dataset_uuid a nb:Dataset;
+                    nb:hasLabel ?dataset_name;
+                    nb:hasSamples ?subject.
+            ?subject a nb:Subject;
+                    nb:hasLabel ?sub_id;
+                    nb:hasSession ?session;
+                    nb:hasSession/nb:hasAcquisition/nb:hasContrastType ?image_modal.
+            ?session nb:hasLabel ?session_id.
+            OPTIONAL {{
+                ?dataset_uuid nb:hasPortalURI ?dataset_portal_uri.
+            }}
+            OPTIONAL {{
+                ?session nb:hasFilePath ?session_file_path.
+            }}
+            OPTIONAL {{
+                ?subject nb:hasAge ?age.
+            }}
+            OPTIONAL {{
+                ?subject nb:hasSex ?sex.
+            }}
+            OPTIONAL {{
+                ?subject nb:hasDiagnosis ?diagnosis.
+            }}
+            OPTIONAL {{
+                ?subject nb:isSubjectGroup ?subject_group.
+            }}
+            OPTIONAL {{
+                ?subject nb:hasAssessment ?assessment.
+            }}
+            {{
+                SELECT ?subject (count(distinct ?session) as ?num_sessions)
+                WHERE {{
+                    ?subject a nb:Subject;
+                            nb:hasSession ?session.
+                    ?session nb:hasAcquisition/nb:hasContrastType ?image_modal.
+                    {session_level_filters}
+                }} GROUP BY ?subject
+            }}
+            {subject_level_filters}
+        }}
+    """
+
+    # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true,
+    # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata.
+    if return_agg:
+        query_string = f"""
+            SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal WHERE {{\n
+            {query_string}
+            \n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal
+        """
+
+    return "\n".join([DEFAULT_CONTEXT, query_string])
+
+
+def create_terms_query(data_element_URI: str) -> str:
+    """
+    Creates a SPARQL query using a simple query template to retrieve term URLS for a given data element.
+
+    Parameters
+    ----------
+    data_element_URI : str
+        The URI of the data element for which to retrieve the URIs of all connected term.
+
+    Returns
+    -------
+    str
+        The SPARQL query.
+    Examples
+    --------
+    get_terms_query("nb:Assessment")
+    """
+
+    query_string = f"""
+    SELECT DISTINCT ?termURL
+    WHERE {{
+        ?termURL a {data_element_URI}.
+    }}
+    """
+
+    return "\n".join([DEFAULT_CONTEXT, query_string])
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..58643a4
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,54 @@
+"""Main app."""
+
+import os
+import warnings
+
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import ORJSONResponse
+
+from .api import utility as util
+from .api.routers import query
+
+app = FastAPI(default_response_class=ORJSONResponse)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val),
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+@app.on_event("startup")
+async def auth_check():
+    """Checks whether username and password environment variables are set."""
+    if (
+        # TODO: Check if this error is still raised when variables are empty strings
+        os.environ.get(util.GRAPH_USERNAME.name) is None
+        or os.environ.get(util.GRAPH_PASSWORD.name) is None
+    ):
+        raise RuntimeError(
+            f"The application was launched but could not find the {util.GRAPH_USERNAME.name} and / or {util.GRAPH_PASSWORD.name} environment variables."
+        )
+
+
+@app.on_event("startup")
+async def allowed_origins_check():
+    """Raises warning if allowed origins environment variable has not been set or is an empty string."""
+    if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "":
+        warnings.warn(
+            f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. "
+            "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. "
+            f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). "
+            "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. "
+        )
+
+
+app.include_router(query.router)
+
+# Automatically start uvicorn server on execution of main.py
+if __name__ == "__main__":
+    uvicorn.run("app.main:app", port=8000, reload=True)

From 596336945f54c4204b8da020d40d5cc95317e630 Mon Sep 17 00:00:00 2001
From: Sebastian Urchs <surchs@users.noreply.github.com>
Date: Thu, 28 Sep 2023 12:10:27 -0400
Subject: [PATCH 2/7] [ENH] Very inspired first federation prototype (#1)

* set up route and crud function for basic federation request

Still the same

* Yay, federation!

Co-authored-by: Alyssa Dai <alyssa.ydai@gmail.com>
Co-authored-by: rmanaem <rmanaem@protonmail.ch>
---
 app/api/crud.py          | 154 ++++++-----------------------
 app/api/models.py        |  61 ++----------
 app/api/routers/query.py |  20 ++--
 app/api/utility.py       | 209 +++++++++------------------------------
 app/main.py              |  57 ++++-------
 5 files changed, 110 insertions(+), 391 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 2b88b6d..40dcaac 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -1,13 +1,9 @@
 """CRUD functions called by path operations."""
 
-import os
-
 import httpx
-import pandas as pd
-from fastapi import HTTPException, status
+from fastapi import HTTPException
 
 from . import utility as util
-from .models import CohortQueryResponse
 
 # Order that dataset and subject-level attributes should appear in the API JSON response.
 # This order is defined explicitly because when graph-returned results are transformed to a dataframe,
@@ -67,129 +63,41 @@ async def get(
         Response of the POST request.
 
     """
-    try:
-        response = httpx.post(
-            url=util.QUERY_URL,
-            content=util.create_query(
-                return_agg=util.RETURN_AGG.val,
-                age=(min_age, max_age),
-                sex=sex,
-                diagnosis=diagnosis,
-                is_control=is_control,
-                min_num_sessions=min_num_sessions,
-                assessment=assessment,
-                image_modal=image_modal,
-            ),
-            headers=util.QUERY_HEADER,
-            auth=httpx.BasicAuth(
-                os.environ.get(util.GRAPH_USERNAME.name),
-                os.environ.get(util.GRAPH_PASSWORD.name),
-            ),
+    cross_node_results = []
+    params = {}
+    if min_age:
+        params["min_age"] = min_age
+    if max_age:
+        params["max_age"] = max_age
+    if sex:
+        params["sex"] = sex
+    if diagnosis:
+        params["diagnosis"] = diagnosis
+    if is_control:
+        params["is_control"] = is_control
+    if min_num_sessions:
+        params["min_num_sessions"] = min_num_sessions
+    if assessment:
+        params["assessment"] = assessment
+    if image_modal:
+        params["image_modal"] = image_modal
+
+    for node_url in util.NEUROBAGEL_NODES:
+        response = httpx.get(
+            url=node_url,
+            params=params,
             # TODO: Revisit timeout value when query performance is improved
             timeout=30.0,
         )
-    except httpx.ConnectTimeout as exc:
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Timed out while connecting to the server. Please confirm that you are connected to the McGill network and try again.",
-        ) from exc
-
-    if not response.is_success:
-        raise HTTPException(
-            status_code=response.status_code,
-            detail=f"{response.reason_phrase}: {response.text}",
-        )
 
-    results = response.json()
+        print("request was: ", response.request.url)
 
-    results_dicts = [
-        {k: v["value"] for k, v in res.items()}
-        for res in results["results"]["bindings"]
-    ]
-    results_df = pd.DataFrame(results_dicts).reindex(columns=ATTRIBUTES_ORDER)
-
-    response_obj = []
-    dataset_cols = ["dataset_uuid", "dataset_name"]
-    if not results_df.empty:
-        for (dataset_uuid, dataset_name), group in results_df.groupby(
-            by=dataset_cols
-        ):
-            if util.RETURN_AGG.val:
-                subject_data = list(group["session_file_path"].dropna())
-            else:
-                subject_data = (
-                    group.drop(dataset_cols, axis=1)
-                    .groupby(by=["sub_id", "session_id"])
-                    .agg(
-                        {
-                            "sub_id": "first",
-                            "session_id": "first",
-                            "num_sessions": "first",
-                            "age": "first",
-                            "sex": "first",
-                            "diagnosis": lambda x: list(set(x)),
-                            "subject_group": "first",
-                            "assessment": lambda x: list(set(x)),
-                            "image_modal": lambda x: list(set(x)),
-                            "session_file_path": "first",
-                        }
-                    )
-                )
-                subject_data = list(subject_data.to_dict("records"))
-
-            response_obj.append(
-                CohortQueryResponse(
-                    dataset_uuid=dataset_uuid,
-                    dataset_name=dataset_name,
-                    dataset_portal_uri=group["dataset_portal_uri"].iloc[0]
-                    if group["dataset_portal_uri"].notna().all()
-                    else None,
-                    num_matching_subjects=group["sub_id"].nunique(),
-                    subject_data=subject_data,
-                    image_modals=list(group["image_modal"].unique()),
-                )
+        if not response.is_success:
+            raise HTTPException(
+                status_code=response.status_code,
+                detail=f"{response.reason_phrase}: {response.text}",
             )
 
-    return response_obj
-
-
-async def get_terms(data_element_URI: str):
-    """
-    Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_terms_query function.
-
-    Parameters
-    ----------
-    data_element_URI : str
-        Controlled term of neurobagel class for which all the available terms should be retrieved.
-
-    Returns
-    -------
-    httpx.response
-        Response of the POST request.
-    """
-    response = httpx.post(
-        url=util.QUERY_URL,
-        content=util.create_terms_query(data_element_URI),
-        headers=util.QUERY_HEADER,
-        auth=httpx.BasicAuth(
-            os.environ.get(util.GRAPH_USERNAME.name),
-            os.environ.get(util.GRAPH_PASSWORD.name),
-        ),
-    )
-
-    if not response.is_success:
-        raise HTTPException(
-            status_code=response.status_code,
-            detail=f"{response.reason_phrase}: {response.text}",
-        )
-
-    results = response.json()
-
-    results_dict = {
-        data_element_URI: [
-            result["termURL"]["value"]
-            for result in results["results"]["bindings"]
-        ]
-    }
+        cross_node_results.append(response.json())
 
-    return results_dict
+    return cross_node_results
diff --git a/app/api/models.py b/app/api/models.py
index f1a31ff..99658cd 100644
--- a/app/api/models.py
+++ b/app/api/models.py
@@ -1,10 +1,5 @@
 """Data models."""
-
-from typing import Optional
-
-from fastapi import Query
-from fastapi.exceptions import HTTPException
-from pydantic import BaseModel, constr, root_validator
+from pydantic import BaseModel
 
 CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"
 
@@ -12,51 +7,11 @@
 class QueryModel(BaseModel):
     """Data model and dependency for API that stores the query parameters to be accepted and validated."""
 
-    min_age: float = Query(default=None, ge=0)
-    max_age: float = Query(default=None, ge=0)
-    sex: constr(regex=CONTROLLED_TERM_REGEX) = None
-    diagnosis: constr(regex=CONTROLLED_TERM_REGEX) = None
+    min_age: float = None
+    max_age: float = None
+    sex: str = None
+    diagnosis: str = None
     is_control: bool = None
-    min_num_sessions: int = Query(default=None, ge=1)
-    assessment: constr(regex=CONTROLLED_TERM_REGEX) = None
-    image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None
-
-    @root_validator()
-    def check_maxage_ge_minage(cls, values):
-        """
-        If both age bounds have been set to values other than their defaults (None), ensure that max_age is >= min_age.
-        NOTE: HTTPException (and not ValueError) raised here to get around "Internal Server Error" raised by
-        FastAPI when a validation error comes from a Pydantic validator inside a class dependency.
-        See:
-        https://github.com/tiangolo/fastapi/issues/1474
-        https://github.com/tiangolo/fastapi/discussions/3426
-        https://fastapi.tiangolo.com/tutorial/handling-errors/?h=validation#requestvalidationerror-vs-validationerror
-        """
-        mina, maxa = values["min_age"], values["max_age"]
-        if mina is not None and maxa is not None and (maxa < mina):
-            raise HTTPException(
-                status_code=422,
-                detail="'max_age' must be greater than or equal to 'min_age'",
-            )
-        return values
-
-    @root_validator
-    def check_exclusive_diagnosis_or_ctrl(cls, values):
-        if values["diagnosis"] is not None and values["is_control"]:
-            raise HTTPException(
-                status_code=422,
-                detail="Subjects cannot both be healthy controls and have a diagnosis.",
-            )
-        return values
-
-
-class CohortQueryResponse(BaseModel):
-    """Data model for query results for one matching dataset (i.e., a cohort)."""
-
-    dataset_uuid: str
-    # dataset_file_path: str  # TODO: Revisit this field once we have datasets without imaging info/sessions.
-    dataset_name: str
-    dataset_portal_uri: Optional[str]
-    num_matching_subjects: int
-    subject_data: list
-    image_modals: list
+    min_num_sessions: int = None
+    assessment: str = None
+    image_modal: str = None
diff --git a/app/api/routers/query.py b/app/api/routers/query.py
index 9e7db97..abb826c 100644
--- a/app/api/routers/query.py
+++ b/app/api/routers/query.py
@@ -1,17 +1,17 @@
 """Router for query path operations."""
 
-from typing import List
-
 from fastapi import APIRouter, Depends
-from pydantic import constr
 
 from .. import crud
-from ..models import CONTROLLED_TERM_REGEX, CohortQueryResponse, QueryModel
+from ..models import QueryModel
+
+router = APIRouter(prefix="/federate", tags=["federation"])
 
-router = APIRouter(prefix="/query", tags=["query"])
 
+# TODO: update to change the logic once crud is modified
 
-@router.get("/", response_model=List[CohortQueryResponse])
+
+@router.get("/")
 async def get_query(query: QueryModel = Depends(QueryModel)):
     """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset."""
     response = await crud.get(
@@ -26,11 +26,3 @@ async def get_query(query: QueryModel = Depends(QueryModel)):
     )
 
     return response
-
-
-@router.get("/attributes/{data_element_URI}")
-async def get_terms(data_element_URI: constr(regex=CONTROLLED_TERM_REGEX)):
-    """When a GET request is sent, return a dict with the only key corresponding to controlled term of a neurobagel class and value corresponding to all the available terms."""
-    response = await crud.get_terms(data_element_URI)
-
-    return response
diff --git a/app/api/utility.py b/app/api/utility.py
index ba3ef1f..f33f1c2 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -2,7 +2,12 @@
 
 import os
 from collections import namedtuple
-from typing import Optional
+
+#  Neurobagel nodes
+NEUROBAGEL_NODES = [
+    "http://206.12.99.17:8888/query/",
+    "http://206.12.89.194:8000/query/",
+]
 
 # Request constants
 EnvVar = namedtuple("EnvVar", ["name", "val"])
@@ -69,166 +74,42 @@ def parse_origins_as_list(allowed_origins: str) -> list:
     return list(allowed_origins.split(" "))
 
 
-def create_query(
-    return_agg: bool,
-    age: Optional[tuple] = (None, None),
-    sex: Optional[str] = None,
-    diagnosis: Optional[str] = None,
-    is_control: Optional[bool] = None,
-    min_num_sessions: Optional[int] = None,
-    assessment: Optional[str] = None,
-    image_modal: Optional[str] = None,
-) -> str:
-    """
-    Creates a SPARQL query using a query template and filters it using the input parameters.
-
-    Parameters
-    ----------
-    return_agg : bool
-        Whether to return only aggregate query results (and not subject-level attributes besides file paths).
-    age : tuple, optional
-        Minimum and maximum age of subject, by default (None, None).
-    sex : str, optional
-        Subject sex, by default None.
-    diagnosis : str, optional
-        Subject diagnosis, by default None.
-    is_control : bool, optional
-        Whether or not subject is a control, by default None.
-    min_num_sessions : int, optional
-        Subject minimum number of imaging sessions, by default None.
-    assessment : str, optional
-        Non-imaging assessment completed by subjects, by default None.
-    image_modal : str, optional
-        Imaging modality of subject scans, by default None.
-
-    Returns
-    -------
-    str
-        The SPARQL query.
-    """
-    subject_level_filters = ""
-
-    if age[0] is not None:
-        subject_level_filters += "\n" + f"FILTER (?{AGE.var} >= {age[0]})."
-    if age[1] is not None:
-        subject_level_filters += "\n" + f"FILTER (?{AGE.var} <= {age[1]})."
-
-    if sex is not None:
-        subject_level_filters += "\n" + f"FILTER (?{SEX.var} = {sex})."
-
-    if diagnosis is not None:
-        subject_level_filters += (
-            "\n" + f"FILTER (?{DIAGNOSIS.var} = {diagnosis})."
-        )
-
-    if is_control is not None:
-        if is_control:
-            subject_level_filters += (
-                "\n" + f"FILTER (?{IS_CONTROL.var} = {IS_CONTROL_TERM})."
-            )
-        else:
-            subject_level_filters += (
-                "\n" + f"FILTER (?{IS_CONTROL.var} != {IS_CONTROL_TERM})."
-            )
-
-    if min_num_sessions is not None:
-        subject_level_filters += (
-            "\n" + f"FILTER (?num_sessions >= {min_num_sessions})."
-        )
-
-    if assessment is not None:
-        subject_level_filters += (
-            "\n" + f"FILTER (?{ASSESSMENT.var} = {assessment})."
-        )
-
-    session_level_filters = ""
-
-    if image_modal is not None:
-        session_level_filters += (
-            "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
-        )
-
-    query_string = f"""
-        SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
-        ?diagnosis ?subject_group ?num_sessions ?session_id ?assessment ?image_modal ?session_file_path
-        WHERE {{
-            ?dataset_uuid a nb:Dataset;
-                    nb:hasLabel ?dataset_name;
-                    nb:hasSamples ?subject.
-            ?subject a nb:Subject;
-                    nb:hasLabel ?sub_id;
-                    nb:hasSession ?session;
-                    nb:hasSession/nb:hasAcquisition/nb:hasContrastType ?image_modal.
-            ?session nb:hasLabel ?session_id.
-            OPTIONAL {{
-                ?dataset_uuid nb:hasPortalURI ?dataset_portal_uri.
-            }}
-            OPTIONAL {{
-                ?session nb:hasFilePath ?session_file_path.
-            }}
-            OPTIONAL {{
-                ?subject nb:hasAge ?age.
-            }}
-            OPTIONAL {{
-                ?subject nb:hasSex ?sex.
-            }}
-            OPTIONAL {{
-                ?subject nb:hasDiagnosis ?diagnosis.
-            }}
-            OPTIONAL {{
-                ?subject nb:isSubjectGroup ?subject_group.
-            }}
-            OPTIONAL {{
-                ?subject nb:hasAssessment ?assessment.
-            }}
-            {{
-                SELECT ?subject (count(distinct ?session) as ?num_sessions)
-                WHERE {{
-                    ?subject a nb:Subject;
-                            nb:hasSession ?session.
-                    ?session nb:hasAcquisition/nb:hasContrastType ?image_modal.
-                    {session_level_filters}
-                }} GROUP BY ?subject
-            }}
-            {subject_level_filters}
-        }}
-    """
-
-    # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true,
-    # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata.
-    if return_agg:
-        query_string = f"""
-            SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal WHERE {{\n
-            {query_string}
-            \n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal
-        """
-
-    return "\n".join([DEFAULT_CONTEXT, query_string])
-
-
-def create_terms_query(data_element_URI: str) -> str:
-    """
-    Creates a SPARQL query using a simple query template to retrieve term URLS for a given data element.
-
-    Parameters
-    ----------
-    data_element_URI : str
-        The URI of the data element for which to retrieve the URIs of all connected term.
-
-    Returns
-    -------
-    str
-        The SPARQL query.
-    Examples
-    --------
-    get_terms_query("nb:Assessment")
-    """
-
-    query_string = f"""
-    SELECT DISTINCT ?termURL
-    WHERE {{
-        ?termURL a {data_element_URI}.
-    }}
-    """
-
-    return "\n".join([DEFAULT_CONTEXT, query_string])
+# def create_query(
+#     return_agg: bool,
+#     age: Optional[tuple] = (None, None),
+#     sex: Optional[str] = None,
+#     diagnosis: Optional[str] = None,
+#     is_control: Optional[bool] = None,
+#     min_num_sessions: Optional[int] = None,
+#     assessment: Optional[str] = None,
+#     image_modal: Optional[str] = None,
+# ) -> str:
+#     """
+#     Creates a neurobagel API query using a query template and filters it using the input parameters.
+
+#     Parameters
+#     ----------
+#     return_agg : bool
+#         Whether to return only aggregate query results (and not subject-level attributes besides file paths).
+#     age : tuple, optional
+#         Minimum and maximum age of subject, by default (None, None).
+#     sex : str, optional
+#         Subject sex, by default None.
+#     diagnosis : str, optional
+#         Subject diagnosis, by default None.
+#     is_control : bool, optional
+#         Whether or not subject is a control, by default None.
+#     min_num_sessions : int, optional
+#         Subject minimum number of imaging sessions, by default None.
+#     assessment : str, optional
+#         Non-imaging assessment completed by subjects, by default None.
+#     image_modal : str, optional
+#         Imaging modality of subject scans, by default None.
+
+#     Returns
+#     -------
+#     str
+#         The Sneurobagel API query string.
+#     """
+#     query_string = "?"
+#     return f""
diff --git a/app/main.py b/app/main.py
index 58643a4..20db039 100644
--- a/app/main.py
+++ b/app/main.py
@@ -1,50 +1,33 @@
 """Main app."""
 
-import os
-import warnings
-
 import uvicorn
 from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import ORJSONResponse
 
-from .api import utility as util
 from .api.routers import query
 
 app = FastAPI(default_response_class=ORJSONResponse)
 
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val),
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-@app.on_event("startup")
-async def auth_check():
-    """Checks whether username and password environment variables are set."""
-    if (
-        # TODO: Check if this error is still raised when variables are empty strings
-        os.environ.get(util.GRAPH_USERNAME.name) is None
-        or os.environ.get(util.GRAPH_PASSWORD.name) is None
-    ):
-        raise RuntimeError(
-            f"The application was launched but could not find the {util.GRAPH_USERNAME.name} and / or {util.GRAPH_PASSWORD.name} environment variables."
-        )
-
-
-@app.on_event("startup")
-async def allowed_origins_check():
-    """Raises warning if allowed origins environment variable has not been set or is an empty string."""
-    if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "":
-        warnings.warn(
-            f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. "
-            "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. "
-            f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). "
-            "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. "
-        )
+# TODO: bring this back when we spin up the query tool
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val),
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+
+
+# @app.on_event("startup")
+# async def allowed_origins_check():
+#     """Raises warning if allowed origins environment variable has not been set or is an empty string."""
+#     if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "":
+#         warnings.warn(
+#             f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. "
+#             "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. "
+#             f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). "
+#             "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. "
+#         )
 
 
 app.include_router(query.router)

From 10a3b8248ad5d5ddbbbed140033fcd08f0588f0e Mon Sep 17 00:00:00 2001
From: Arman Jahanpour <77515879+rmanaem@users.noreply.github.com>
Date: Thu, 28 Sep 2023 16:55:16 -0400
Subject: [PATCH 3/7] [MNT] Cleaned up the existing code (#3)

* Changed router prefix and tag to `query`

* Cleaned the good stuff

Co-authored-by: Sebastian Urchs <surchs@users.noreply.github.com>
Co-authored-by: Alyssa Dai <alyssa.ydai@gmail.com>

* Updated docstring for `get` function of `crud.py`

Co-authored-by: Alyssa Dai <alyssa.dai@mail.mcgill.ca>

---------

Co-authored-by: Sebastian Urchs <surchs@users.noreply.github.com>
Co-authored-by: Alyssa Dai <alyssa.ydai@gmail.com>
Co-authored-by: Alyssa Dai <alyssa.dai@mail.mcgill.ca>
---
 app/api/crud.py          |  26 +--------
 app/api/models.py        |   2 -
 app/api/routers/query.py |   5 +-
 app/api/utility.py       | 110 +--------------------------------------
 app/main.py              |  28 +++-------
 5 files changed, 12 insertions(+), 159 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 40dcaac..c8a4c06 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -5,26 +5,6 @@
 
 from . import utility as util
 
-# Order that dataset and subject-level attributes should appear in the API JSON response.
-# This order is defined explicitly because when graph-returned results are transformed to a dataframe,
-# the default order of columns may be different than the order that variables are given in the SPARQL SELECT state
-ATTRIBUTES_ORDER = [
-    "sub_id",
-    "num_sessions",
-    "session_id",
-    "session_file_path",
-    "age",
-    "sex",
-    "diagnosis",
-    "subject_group",
-    "assessment",
-    "image_modal",
-    "dataset_name",
-    "dataset_uuid",
-    "dataset_portal_uri",
-]
-
-
 async def get(
     min_age: float,
     max_age: float,
@@ -36,7 +16,7 @@ async def get(
     image_modal: str,
 ):
     """
-    Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_query function.
+    Makes GET requests to one or more Neurobagel node APIs using httpx where the parameters are Neurobagel query parameters.
 
     Parameters
     ----------
@@ -90,14 +70,12 @@ async def get(
             timeout=30.0,
         )
 
-        print("request was: ", response.request.url)
-
         if not response.is_success:
             raise HTTPException(
                 status_code=response.status_code,
                 detail=f"{response.reason_phrase}: {response.text}",
             )
 
-        cross_node_results.append(response.json())
+        cross_node_results += response.json()
 
     return cross_node_results
diff --git a/app/api/models.py b/app/api/models.py
index 99658cd..1c24396 100644
--- a/app/api/models.py
+++ b/app/api/models.py
@@ -1,8 +1,6 @@
 """Data models."""
 from pydantic import BaseModel
 
-CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"
-
 
 class QueryModel(BaseModel):
     """Data model and dependency for API that stores the query parameters to be accepted and validated."""
diff --git a/app/api/routers/query.py b/app/api/routers/query.py
index abb826c..e4fad74 100644
--- a/app/api/routers/query.py
+++ b/app/api/routers/query.py
@@ -5,10 +5,7 @@
 from .. import crud
 from ..models import QueryModel
 
-router = APIRouter(prefix="/federate", tags=["federation"])
-
-
-# TODO: update to change the logic once crud is modified
+router = APIRouter(prefix="/query", tags=["query"])
 
 
 @router.get("/")
diff --git a/app/api/utility.py b/app/api/utility.py
index f33f1c2..63058e7 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -1,115 +1,7 @@
-"""Constants for Stardog graph connection and utility functions for writing the SPARQL query."""
-
-import os
-from collections import namedtuple
+"""Constants for federation."""
 
 #  Neurobagel nodes
 NEUROBAGEL_NODES = [
     "http://206.12.99.17:8888/query/",
     "http://206.12.89.194:8000/query/",
 ]
-
-# Request constants
-EnvVar = namedtuple("EnvVar", ["name", "val"])
-
-ALLOWED_ORIGINS = EnvVar(
-    "NB_API_ALLOWED_ORIGINS", os.environ.get("NB_API_ALLOWED_ORIGINS", "")
-)
-
-GRAPH_USERNAME = EnvVar(
-    "NB_GRAPH_USERNAME", os.environ.get("NB_GRAPH_USERNAME")
-)
-GRAPH_PASSWORD = EnvVar(
-    "NB_GRAPH_PASSWORD", os.environ.get("NB_GRAPH_PASSWORD")
-)
-GRAPH_ADDRESS = EnvVar(
-    "NB_GRAPH_ADDRESS", os.environ.get("NB_GRAPH_ADDRESS", "206.12.99.17")
-)
-GRAPH_DB = EnvVar(
-    "NB_GRAPH_DB", os.environ.get("NB_GRAPH_DB", "test_data/query")
-)
-GRAPH_PORT = EnvVar("NB_GRAPH_PORT", os.environ.get("NB_GRAPH_PORT", 5820))
-# TODO: Environment variables can't be parsed as bool so this is a workaround but isn't ideal.
-# Another option is to switch this to a command-line argument, but that would require changing the
-# Dockerfile also since Uvicorn can't accept custom command-line args.
-RETURN_AGG = EnvVar(
-    "NB_RETURN_AGG", os.environ.get("NB_RETURN_AGG", "True").lower() == "true"
-)
-
-QUERY_URL = f"http://{GRAPH_ADDRESS.val}:{GRAPH_PORT.val}/{GRAPH_DB.val}"
-QUERY_HEADER = {
-    "Content-Type": "application/sparql-query",
-    "Accept": "application/sparql-results+json",
-}
-
-# SPARQL query context
-DEFAULT_CONTEXT = """
-PREFIX cogatlas: <https://www.cognitiveatlas.org/task/id/>
-PREFIX nb: <http://neurobagel.org/vocab/>
-PREFIX nbg: <http://neurobagel.org/graph/>
-PREFIX ncit: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
-PREFIX nidm: <http://purl.org/nidash/nidm#>
-PREFIX snomed: <http://purl.bioontology.org/ontology/SNOMEDCT/>
-"""
-
-# Store domains in named tuples
-Domain = namedtuple("Domain", ["var", "pred"])
-# Core domains
-AGE = Domain("age", "nb:hasAge")
-SEX = Domain("sex", "nb:hasSex")
-DIAGNOSIS = Domain("diagnosis", "nb:hasDiagnosis")
-IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup")
-ASSESSMENT = Domain("assessment", "nb:hasAssessment")
-IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType")
-PROJECT = Domain("project", "nb:hasSamples")
-
-
-CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT]
-
-IS_CONTROL_TERM = "ncit:C94342"
-
-
-def parse_origins_as_list(allowed_origins: str) -> list:
-    """Returns user-defined allowed origins as a list."""
-    return list(allowed_origins.split(" "))
-
-
-# def create_query(
-#     return_agg: bool,
-#     age: Optional[tuple] = (None, None),
-#     sex: Optional[str] = None,
-#     diagnosis: Optional[str] = None,
-#     is_control: Optional[bool] = None,
-#     min_num_sessions: Optional[int] = None,
-#     assessment: Optional[str] = None,
-#     image_modal: Optional[str] = None,
-# ) -> str:
-#     """
-#     Creates a neurobagel API query using a query template and filters it using the input parameters.
-
-#     Parameters
-#     ----------
-#     return_agg : bool
-#         Whether to return only aggregate query results (and not subject-level attributes besides file paths).
-#     age : tuple, optional
-#         Minimum and maximum age of subject, by default (None, None).
-#     sex : str, optional
-#         Subject sex, by default None.
-#     diagnosis : str, optional
-#         Subject diagnosis, by default None.
-#     is_control : bool, optional
-#         Whether or not subject is a control, by default None.
-#     min_num_sessions : int, optional
-#         Subject minimum number of imaging sessions, by default None.
-#     assessment : str, optional
-#         Non-imaging assessment completed by subjects, by default None.
-#     image_modal : str, optional
-#         Imaging modality of subject scans, by default None.
-
-#     Returns
-#     -------
-#     str
-#         The Sneurobagel API query string.
-#     """
-#     query_string = "?"
-#     return f""
diff --git a/app/main.py b/app/main.py
index 20db039..04f71dc 100644
--- a/app/main.py
+++ b/app/main.py
@@ -3,31 +3,19 @@
 import uvicorn
 from fastapi import FastAPI
 from fastapi.responses import ORJSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 
 from .api.routers import query
 
 app = FastAPI(default_response_class=ORJSONResponse)
 
-# TODO: bring this back when we spin up the query tool
-# app.add_middleware(
-#     CORSMiddleware,
-#     allow_origins=util.parse_origins_as_list(util.ALLOWED_ORIGINS.val),
-#     allow_credentials=True,
-#     allow_methods=["*"],
-#     allow_headers=["*"],
-# )
-
-
-# @app.on_event("startup")
-# async def allowed_origins_check():
-#     """Raises warning if allowed origins environment variable has not been set or is an empty string."""
-#     if os.environ.get(util.ALLOWED_ORIGINS.name, "") == "":
-#         warnings.warn(
-#             f"The API was launched without providing any values for the {util.ALLOWED_ORIGINS.name} environment variable. "
-#             "This means that the API will only be accessible from the same origin it is hosted from: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy. "
-#             f"If you want to access the API from tools hosted at other origins such as the Neurobagel query tool, explicitly set the value of {util.ALLOWED_ORIGINS.name} to the origin(s) of these tools (e.g. http://localhost:3000). "
-#             "Multiple allowed origins should be separated with spaces in a single string enclosed in quotes. "
-#         )
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 
 
 app.include_router(query.router)

From 1c38dcbd2440e77624e6d5f4131684a57a31c985 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 28 Sep 2023 17:12:12 -0400
Subject: [PATCH 4/7] Added DOCKERFILE

---
 Dockerfile | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b8a17d6
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.10
+
+WORKDIR /usr/src/
+
+COPY ./requirements.txt /usr/src/app/requirements.txt
+
+RUN pip install --no-cache-dir --upgrade -r /usr/src/app/requirements.txt
+
+COPY ./app /usr/src/app
+
+# NB_API_PORT, representing the port on which the API will be exposed, 
+# is an environment variable that will always have a default value of 8000 when building the image
+# but can be overridden when running the container.
+ENTRYPOINT uvicorn app.main:app --proxy-headers --host 0.0.0.0 --port ${NB_API_PORT:-8000}

From 2902cccb6b1eb111cc20c729bf6dbd46898edff5 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 4 Oct 2023 13:45:30 -0400
Subject: [PATCH 5/7] Updated  to point to  database

---
 app/api/utility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/api/utility.py b/app/api/utility.py
index 63058e7..357f4b8 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -2,6 +2,6 @@
 
 #  Neurobagel nodes
 NEUROBAGEL_NODES = [
-    "http://206.12.99.17:8888/query/",
+    "http://206.12.99.17:8000/query/",
     "http://206.12.89.194:8000/query/",
 ]

From 6e8b9d2ce7249ffd819d98c64d8bc7b9286cfa87 Mon Sep 17 00:00:00 2001
From: Arman Jahanpour <77515879+rmanaem@users.noreply.github.com>
Date: Fri, 6 Oct 2023 13:20:23 -0400
Subject: [PATCH 6/7] [ENH] Turned NEUROBAGEL_NODES to an env var (#6)

---
 app/api/utility.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/app/api/utility.py b/app/api/utility.py
index 357f4b8..efe1aa9 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -1,7 +1,6 @@
 """Constants for federation."""
 
+import os
+
 #  Neurobagel nodes
-NEUROBAGEL_NODES = [
-    "http://206.12.99.17:8000/query/",
-    "http://206.12.89.194:8000/query/",
-]
+NEUROBAGEL_NODES = os.environ.get("NB_NODES", ["http://206.12.99.17:8000/query/"])
\ No newline at end of file

From 6468cecc8f0a7ecc5545763dabd896301dee45e7 Mon Sep 17 00:00:00 2001
From: Arman Jahanpour <77515879+rmanaem@users.noreply.github.com>
Date: Tue, 10 Oct 2023 15:19:02 -0400
Subject: [PATCH 7/7] [CI] Set up workflows (#7)

* Turned NEUROBAGEL_NODES to an env var

* Added dependabot.yaml config file

* Added codespell.yaml workflow file

* Added lint.yaml workflow file

* Added build_docker.yaml workflow file
---
 .github/dependabot.yaml             | 12 ++++++++++
 .github/workflows/build_docker.yaml | 32 ++++++++++++++++++++++++++
 .github/workflows/codespell.yaml    | 19 ++++++++++++++++
 .github/workflows/lint.yaml         | 35 +++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+)
 create mode 100644 .github/dependabot.yaml
 create mode 100644 .github/workflows/build_docker.yaml
 create mode 100644 .github/workflows/codespell.yaml
 create mode 100644 .github/workflows/lint.yaml

diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
new file mode 100644
index 0000000..c1f7a9d
--- /dev/null
+++ b/.github/dependabot.yaml
@@ -0,0 +1,12 @@
+# Documentation
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+version: 2
+updates:
+  - package-ecosystem: 'github-actions'
+    directory: '/'
+    schedule:
+      interval: 'weekly'
+    labels:
+      - "_bot"
+      - "maint:dependency"
+      - "type:maintenance"
diff --git a/.github/workflows/build_docker.yaml b/.github/workflows/build_docker.yaml
new file mode 100644
index 0000000..63511cd
--- /dev/null
+++ b/.github/workflows/build_docker.yaml
@@ -0,0 +1,32 @@
+name: build docker
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  build docker:
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Checkout
+        uses: actions/checkout@v4
+      -
+        name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./Dockerfile
+          push: true
+          tags: ${{ secrets.DOCKERHUB_USERNAME }}/federation_api:latest
\ No newline at end of file
diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml
new file mode 100644
index 0000000..3bda702
--- /dev/null
+++ b/.github/workflows/codespell.yaml
@@ -0,0 +1,19 @@
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 0000000..ab395dc
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,35 @@
+name: lint
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  lint:
+
+    runs-on: ubuntu-latest
+
+    steps:
+
+    - uses: actions/checkout@v4
+
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.10"
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --per-file-ignores=./app/api/models.py:F722
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
\ No newline at end of file