Merge branch 'main' into lvmvis_experiments

sdss · Nov 6, 2024 · 9be909e · 9be909e
2 parents f65ff58 + b496410
commit 9be909e
Show file tree

Hide file tree

Showing 18 changed files with 594 additions and 233 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -12,6 +12,10 @@ Change Log
 * Adds endpoints for listing cartons and programs
 * Adds endpoints for target search by carton and program
 * Adds ``db_reset`` option to avoid closing and resetting the database connection
+* Updates main, cone, and carton/program search to use new ``has_been_observed`` column; default is True.
+* Updates the ``append_pipes`` query to return the new ``has_been_observed`` column
+* Include parent catalog associations in the ``/target/catalogs`` endpoint (#37).
+* Adds endpoint to get parent catalog data for a given target (#38).
 
 0.1.0 (10-24-2023)
 ------------------

diff --git a/Dockerfile b/Dockerfile
@@ -47,7 +47,7 @@ RUN git config --global credential.helper 'store --file=/root/.git-credentials'
 # Install poetry and project dependencies
 RUN pip install poetry && \
     poetry config virtualenvs.create false && \
-    poetry install --no-root && \
+    poetry install -E solara --no-root && \
     rm -rf ~/.cache
 
 # Remove credentials after use
@@ -59,7 +59,7 @@ FROM dep-stage as dev-stage
 
 # Copy the main project files over and install
 COPY ./ ./
-RUN poetry install --only main
+RUN poetry install -E solara --only main
 
 # Create dir for socket and logs
 RUN mkdir -p /tmp/webapp

diff --git a/Dockerfile.dev b/Dockerfile.dev
@@ -17,15 +17,15 @@ RUN apt-get update && \
 # Install poetry and project dependencies
 RUN pip install poetry && \
     poetry config virtualenvs.create false && \
-    poetry install --no-root && \
+    poetry install -E solara --no-root && \
     rm -rf ~/.cache
 
 # Stage 2: Development stage for the project
 FROM dep-stage as dev-stage
 
 # Copy the main project files over and install
 COPY ./ ./
-RUN poetry install --only main
+RUN poetry install -E solara --only main
 
 # setting environment variables
 # these can be manually overridden

diff --git a/build.py b/build.py
diff --git a/create_setup.py b/create_setup.py
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,7 +23,6 @@ packages = [
     { include = "valis", from = "python" }
 ]
 include = ["python/valis/etc/*", "cextern/**/*", "LICENSE.md"]
-build = "build.py"
 
 [tool.poetry.scripts]
 valis = "valis.cli:main"
@@ -46,10 +45,11 @@ httpx = "^0.24.0"
 astroquery = "^0.4.6"
 pandas = "^1.5.3"
 SQLAlchemy = "^1.4.35"
-sdssdb = "^0.12.0"
+sdssdb = "^0.13.1"
 deepmerge = "^1.1.1"
 fuzzy-types = "^0.1.3"
 sdss-solara = {git = "https://github.com/sdss/sdss_solara.git", rev = "main", optional = true}
+markdown = "^3.7"
 
 [tool.poetry.dev-dependencies]
 ipython = ">=7.11.0"

diff --git a/python/valis/db/db.py b/python/valis/db/db.py
@@ -2,10 +2,11 @@
 # -*- coding: utf-8 -*-
 #
 
+import asyncio
 from contextvars import ContextVar
 
 import peewee
-from fastapi import HTTPException
+from fastapi import HTTPException, Depends
 from sdssdb.peewee.sdss5db import database as pdb
 from sdssdb.sqlalchemy.sdss5db import database as sdb
 
@@ -18,10 +19,14 @@
 db_state = ContextVar("db_state", default=db_state_default.copy())
 
 
-def reset_db_state():
-    """ Sub-depdency for get_db that resets the context connection state """
-    pdb._state._state.set(db_state_default.copy())
-    pdb._state.reset()
+async def reset_db_state():
+    """ Sub-dependency for get_db that resets the context connection state """
+
+    from valis.main import settings
+
+    if settings.db_reset:
+        pdb._state._state.set(db_state_default.copy())
+        pdb._state.reset()
 
 
 class PeeweeConnectionState(peewee._ConnectionState):
@@ -64,16 +69,18 @@ def connect_db(db, orm: str = 'peewee'):
 
     return db
 
-def get_pw_db():
+async def get_pw_db(db_state=Depends(reset_db_state)):
     """ Dependency to connect a database with peewee """
 
     from valis.main import settings
 
+    # connect to the db, yield None since we don't need the db in peewee
     if settings.db_reset:
-        reset_db_state()
+        db = connect_db(pdb, orm='peewee')
+    else:
+        async with asyncio.Lock():
+            db = connect_db(pdb, orm='peewee')
 
-    # connect to the db, yield None since we don't need the db in peewee
-    db = connect_db(pdb, orm='peewee')
     try:
         yield db
     finally:

diff --git a/python/valis/db/models.py b/python/valis/db/models.py
@@ -7,7 +7,7 @@
 import datetime
 import math
 from typing import Optional, Annotated, Any, TypeVar
-from pydantic import ConfigDict, BaseModel, Field, BeforeValidator
+from pydantic import ConfigDict, BaseModel, Field, BeforeValidator, FieldSerializationInfo, field_serializer, field_validator, FieldValidationInfo
 from enum import Enum
 
 
@@ -54,6 +54,11 @@ class SDSSidStackedBase(PeeweeBase):
     catalogid21: Optional[int] = Field(None, description='the version 21 catalog id')
     catalogid25: Optional[int] = Field(None, description='the version 25 catalog id')
     catalogid31: Optional[int] = Field(None, description='the version 31 catalog id')
+    last_updated: datetime.date = Field(None, description='the date the sdss_id row was last updated', exclude=True)
+
+    @field_serializer('last_updated')
+    def serialize_dt(self, date: datetime.date) -> str:
+        return date.isoformat()
 
 
 class SDSSidFlatBase(PeeweeBase):
@@ -67,20 +72,32 @@ class SDSSidFlatBase(PeeweeBase):
     n_associated: int = Field(..., description='The total number of sdss_ids associated with that catalogid.')
     ra_catalogid: Optional[float] = Field(None, description='Right Ascension, in degrees, specific to the catalogid')
     dec_catalogid: Optional[float] = Field(None, description='Declination, in degrees, specific to the catalogid')
-
+    rank: int = Field(..., description='Ranking when catalogid paired to multiple sdss_id, with rank 1 as priority.')
 
 class SDSSidPipesBase(PeeweeBase):
     """ Pydantic response model for the Peewee vizdb.SDSSidToPipes ORM """
 
     sdss_id: int = Field(..., description='the SDSS identifier')
     in_boss: bool = Field(..., description='Flag if target is in the BHM reductions', examples=[False])
     in_apogee: bool = Field(..., description='Flag if target is in the MWM reductions', examples=[False])
+    in_bvs: bool = Field(..., description='Flag if target is in the boss component of the Astra reductions', examples=[False], exclude=True)
     in_astra: bool = Field(..., description='Flag if the target is in the Astra reductions', examples=[False])
-
+    has_been_observed: Optional[bool] = Field(False, validate_default=True, description='Flag if target has been observed or not', examples=[False])
+    release: Optional[str] = Field(None, description='the Astra release field, either sdss5 or dr17')
+    obs: Optional[str] = Field(None, description='the observatory the observation is from')
+    mjd: Optional[int] = Field(None, description='the MJD of the data reduction')
+
+    @field_validator('has_been_observed')
+    @classmethod
+    def is_observed(cls, v: str, info: FieldValidationInfo) -> str:
+        """ validator for when has_been_observed was not available in table """
+        if not v:
+            return info.data['in_boss'] or info.data['in_apogee'] or info.data['in_astra']
+        return v
 
 class SDSSModel(SDSSidStackedBase, SDSSidPipesBase):
     """ Main Pydantic response for SDSS id plus Pipes flags """
-    pass
+    distance: Optional[float] = Field(None, description='Separation distance between input target and cone search results, in degrees')
 
 
 class BossSpectrum(PeeweeBase):
@@ -94,12 +111,13 @@ class BossSpectrum(PeeweeBase):
     survey: str = None
     firstcarton: str = None
     objtype: str = None
-    specobjid: int = None
+    specobjid: Optional[int] = None
 
 class AstraSource(PeeweeBase):
     """ Pydantic response model for the MWM Astra source metadata """
     sdss_id: int = None
-    sdss4_apogee_id: Optional[int] = None
+    catalogid: int = None
+    sdss4_apogee_id: Optional[str] = None
     gaia_dr2_source_id: Optional[int] = None
     gaia_dr3_source_id: Optional[int] = None
     tic_v8_id: Optional[int] = None
@@ -115,8 +133,8 @@ class AstraSource(PeeweeBase):
     sdss4_apogee_extra_target_flags: int = None
     ra: float = None
     dec: float = None
-    n_boss_visits: int = None
-    n_apogee_visits: int = None
+    n_boss_visits: Optional[int] = None
+    n_apogee_visits: Optional[int] = None
     l: float = None
     b: float = None
     ebv: float = None
@@ -184,9 +202,30 @@ class CatalogModel(PeeweeBase):
     parallax: Optional[float] = None
 
 
+class ParentCatalogModel(PeeweeBase):
+    """Pydantic model for parent catalog information """
+
+    sdss_id: Annotated[int, Field(description='The sdss_id associated with the parent catalogue data')]
+    catalogid: Annotated[int, Field(description='The catalogid associated with the parent catalogue data')]
+
+    # This model is usually instantiated with a dictionary of all the parent
+    # catalogue columns so we allow extra fields.
+    model_config = ConfigDict(extra='allow')
+
+
 class CatalogResponse(CatalogModel, SDSSidFlatBase):
     """ Response model for source catalog and sdss_id information """
-    pass
+
+    parent_catalogs: dict[str, Any] = Field(..., description='The parent catalog associations for a given catalogid')
+
+    @field_serializer('parent_catalogs')
+    def serialize_parent_catalogs(v: dict[str, Any], info: FieldSerializationInfo) -> dict[str, Any]:
+        """ Serialize the parent catalogs, excluding None values and trimming strings."""
+
+        if info.exclude_none:
+            return {k: v.strip() if isinstance(v, str) else v for k, v in v.items() if v is not None}
+        else:
+            return v
 
 
 class CartonModel(PeeweeBase):
@@ -235,4 +274,33 @@ class MapperName(str, Enum):
     """Mapper names"""
     MWM: str = 'MWM'
     BHM: str = 'BHM'
-    LVM: str = 'LVM'
+    LVM: str = 'LVM'
+
+
+def gen_misc_models():
+    """ Generate metadata info for miscellaneous return columns
+
+    This is a hack to handle cases where we return columns in search
+    results that are not part of the original pipelines database ORM
+    but we want to have the same metadata like "display_name" and
+    "description".  For example, the "distance" column returned by cone
+    searches, as part of the SDSSModel response.  This fakes the metadata
+    framework so the front-end can use the same code. See usage for the
+    info/database endpoint in "convert_metadata".
+
+    Fakes a "misc" database schema and "misctab" table to hold these
+    pseudo-columns.
+
+    """
+    params = [{'SDSSModel.distance': {'display_name': 'Distance [deg]',
+                                      'unit': 'degree', 'sql_type': 'float'}}]
+
+    for param in params:
+        name, data = param.popitem()
+        model, column = name.split('.')
+        col = globals()[model].model_fields[column]
+
+        yield {"pk": 0, "schema": "miscdb", "table_name": "misctab",
+               "column_name": column, "display_name": data["display_name"],
+               "description": col.description,
+               "unit": data["unit"], "sql_type": data["sql_type"]}