Skip to content

Commit

Permalink
Fix loggers and db initialization when using multiple Gunicorn workers (
Browse files Browse the repository at this point in the history
#477)

### Description

- disable default Gunicorn logger handlers to use our custom loggers
- remove `--preload` flag which break our async Queue logger handlers
- allow to prevent db init on Hyperion startup using an env variable
- add a Gunicorn on_starting hook to init the db

### Checklist

- [ ] Created tests which fail without the change (if possible)
- [ ] All tests passing
- [x] Extended the documentation, if necessary

---------

Co-authored-by: Julien <[email protected]>
  • Loading branch information
armanddidierjean and julien4215 authored Aug 6, 2024
1 parent 92095fb commit 959ac7d
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 38 deletions.
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ FROM tiangolo/uvicorn-gunicorn-fastapi:python3.11
# Image running several instances of uvicorn in parallel with gunicorn, listens on port 80
# See https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker

# Gunicorn config file must be named `gunicorn_conf.py` to be used by `uvicorn-gunicorn-fastapi`
# See https://github.com/tiangolo/uvicorn-gunicorn-docker?tab=readme-ov-file#gunicorn_conf
COPY ./gunicorn.conf.py /app/gunicorn_conf.py

COPY ./alembic.ini /app/alembic.ini
COPY ./migrations /app/migrations

Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,13 @@ To enable the service:
1. Go to [Google cloud, IAM and administration, Service account](https://console.cloud.google.com/iam-admin/serviceaccounts) and add a new Service Account with Messaging API capabilities.
2. Choose _Manage keys_ and create a new JSON key.
3. Rename the file `firebase.json` and add it at Hyperion root

---

## Running Hyperion with Gunicorn

For production we encourage to use Gunicorn to run and manage multiple Uvicorn workers. You can use our [docker image](./Dockerfile) and [docker-compose file](./docker-compose.yaml) files to run Hyperion with Gunicorn. See [Gunicorn with Uvicorn](https://fastapi.tiangolo.com/deployment/server-workers/#gunicorn-with-uvicorn-workers) FastAPI documentation.

Do not use gunicorn `--preload` flag. It initialise a first Hyperion instance then fork it to create workers. This is not compatible with the way we handle loggers in their own thread.

You should use our [Gunicorn configuration file](./gunicorn_conf.py) to ensure that database initialization and migrations are only run once.
84 changes: 57 additions & 27 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from app.core.config import Settings
from app.core.groups.groups_type import GroupType
from app.core.log import LogConfig
from app.dependencies import get_db_engine, get_redis_client, get_settings
from app.dependencies import get_redis_client
from app.modules.module_list import module_list
from app.types.exceptions import ContentHTTPException
from app.types.sqlalchemy import Base
Expand Down Expand Up @@ -105,7 +105,11 @@ def run_alembic_upgrade(connection: Connection) -> None:
alembic_command.upgrade(alembic_cfg, "head")


def update_db_tables(engine: Engine, drop_db: bool = False) -> None:
def update_db_tables(
sync_engine: Engine,
hyperion_error_logger: logging.Logger,
drop_db: bool = False,
) -> None:
"""
If the database is not initialized, create the tables and stamp the database with the latest revision.
Otherwise, run the alembic upgrade command to upgrade the database to the latest version (`head`).
Expand All @@ -115,11 +119,9 @@ def update_db_tables(engine: Engine, drop_db: bool = False) -> None:
This method requires a synchronous engine
"""

hyperion_error_logger = logging.getLogger("hyperion.error")

try:
# We have an Engine, we want to acquire a Connection
with engine.begin() as conn:
with sync_engine.begin() as conn:
if drop_db:
# All tables should be dropped, including the alembic_version table
# or Hyperion will think that the database is up to date and will not initialize it
Expand Down Expand Up @@ -163,13 +165,14 @@ def update_db_tables(engine: Engine, drop_db: bool = False) -> None:
raise


def initialize_groups(engine: Engine) -> None:
def initialize_groups(
sync_engine: Engine,
hyperion_error_logger: logging.Logger,
) -> None:
"""Add the necessary groups for account types"""

hyperion_error_logger = logging.getLogger("hyperion.error")

hyperion_error_logger.info("Startup: Adding new groups to the database")
with Session(engine) as db:
with Session(sync_engine) as db:
for group_type in GroupType:
exists = initialization.get_group_by_id_sync(group_id=group_type, db=db)
# We don't want to recreate the groups if they already exist
Expand All @@ -188,13 +191,14 @@ def initialize_groups(engine: Engine) -> None:
)


def initialize_module_visibility(engine: Engine) -> None:
def initialize_module_visibility(
sync_engine: Engine,
hyperion_error_logger: logging.Logger,
) -> None:
"""Add the default module visibilities for Titan"""

hyperion_error_logger = logging.getLogger("hyperion.error")

with Session(engine) as db:
# Is run to create default module visibilies or when the table is empty
with Session(sync_engine) as db:
# Is run to create default module visibilities or when the table is empty
haveBeenInitialized = (
len(initialization.get_all_module_visibility_membership_sync(db)) > 0
)
Expand Down Expand Up @@ -238,6 +242,37 @@ def use_route_path_as_operation_ids(app: FastAPI) -> None:
route.operation_id = method.lower() + route.path.replace("/", "_")


def init_db(
settings: Settings,
hyperion_error_logger: logging.Logger,
drop_db: bool = False,
) -> None:
"""
Init the database by creating the tables and adding the necessary groups
The method will use a synchronous engine to create the tables and add the groups
"""
# Initialize the sync engine
sync_engine = initialization.get_sync_db_engine(settings=settings)

# Update database tables
update_db_tables(
sync_engine=sync_engine,
hyperion_error_logger=hyperion_error_logger,
drop_db=drop_db,
)

# Initialize database tables
initialize_groups(
sync_engine=sync_engine,
hyperion_error_logger=hyperion_error_logger,
)
initialize_module_visibility(
sync_engine=sync_engine,
hyperion_error_logger=hyperion_error_logger,
)


# We wrap the application in a function to be able to pass the settings and drop_db parameters
# The drop_db parameter is used to drop the database tables before creating them again
def get_application(settings: Settings, drop_db: bool = False) -> FastAPI:
Expand Down Expand Up @@ -294,18 +329,14 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
calypsso = get_calypsso_app()
app.mount("/calypsso", calypsso, "Calypsso")

# Initialize database connection
app.dependency_overrides.get(get_db_engine, get_db_engine)(
settings=settings,
) # Initialize the async engine
sync_engine = initialization.get_sync_db_engine(settings=settings)

# Update database tables
update_db_tables(sync_engine, drop_db)

# Initialize database tables
initialize_groups(sync_engine)
initialize_module_visibility(sync_engine)
if settings.HYPERION_INIT_DB:
init_db(
settings=settings,
hyperion_error_logger=hyperion_error_logger,
drop_db=drop_db,
)
else:
hyperion_error_logger.info("Database initialization skipped")

# Initialize Redis
if not app.dependency_overrides.get(get_redis_client, get_redis_client)(
Expand Down Expand Up @@ -342,7 +373,6 @@ async def logging_middleware(
port = request.client.port
client_address = f"{ip_address}:{port}"

settings: Settings = app.dependency_overrides.get(get_settings, get_settings)()
redis_client: redis.Redis | Literal[False] | None = (
app.dependency_overrides.get(
get_redis_client,
Expand Down
28 changes: 22 additions & 6 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Settings(BaseSettings):
1. An environment variable
2. The dotenv .env file
See [Pydantic Settings documentation](https://pydantic-docs.helpmanual.io/usage/settings/#dotenv-env-support) for more information.
See [Pydantic Settings documentation](https://docs.pydantic.dev/latest/concepts/pydantic_settings/#dotenv-env-support) for more information.
See [FastAPI settings](https://fastapi.tiangolo.com/advanced/settings/) article for best practices with settings.
To access these settings, the `get_settings` dependency should be used.
Expand Down Expand Up @@ -118,11 +118,6 @@ class Settings(BaseSettings):
# By default, only production's records are logged
LOG_DEBUG_MESSAGES: bool | None

# Hyperion follows Semantic Versioning
# https://semver.org/
HYPERION_VERSION: str = "2.6.0"
MINIMAL_TITAN_VERSION_CODE: int = 113

# Origins for the CORS middleware. `["http://localhost"]` can be used for development.
# See https://fastapi.tiangolo.com/tutorial/cors/
# It should begin with 'http://' or 'https:// and should never end with a '/'
Expand Down Expand Up @@ -168,10 +163,24 @@ class Settings(BaseSettings):
# NOTE: AUTH_CLIENTS property should never be used in the code. To get an auth client, use `KNOWN_AUTH_CLIENTS`
AUTH_CLIENTS: list[tuple[str, str | None, list[str], str]]

#################################
# Hardcoded Hyperion parameters #
#################################

# Hyperion follows Semantic Versioning
# https://semver.org/
HYPERION_VERSION: str = "2.6.0"
MINIMAL_TITAN_VERSION_CODE: int = 113

######################################
# Automatically generated parameters #
######################################

# If Hyperion should initialize the database on startup
# This environment variable is set by the Gunicorn on_starting hook, to tell the workers to avoid initializing the database
# You don't want to set this variable manually
HYPERION_INIT_DB: bool = True

# The following properties can not be instantiated as class variables as them need to be computed using another property from the class,
# which won't be available before the .env file parsing.
# We thus decide to use the decorator `@property` to make these methods usable as properties and not functions: as properties: Settings.RSA_PRIVATE_KEY, Settings.RSA_PUBLIC_KEY and Settings.RSA_PUBLIC_JWK
Expand Down Expand Up @@ -321,3 +330,10 @@ def init_cached_property(self) -> "Settings":
self.RSA_PUBLIC_JWK # noqa

return self


def construct_prod_settings() -> Settings:
"""
Return the production settings
"""
return Settings(_env_file=".env")
9 changes: 6 additions & 3 deletions app/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,17 @@ async def get_users(db: AsyncSession = Depends(get_db)):

from app.core import models_core, security
from app.core.auth import schemas_auth
from app.core.config import Settings
from app.core.config import Settings, construct_prod_settings
from app.core.groups.groups_type import GroupType, get_ecl_groups
from app.core.payment.payment_tool import PaymentTool
from app.core.users import cruds_users
from app.types.scopes_type import ScopeType
from app.utils.communication.notifications import NotificationManager, NotificationTool
from app.utils.redis import connect
from app.utils.tools import is_user_external, is_user_member_of_an_allowed_group
from app.utils.tools import (
is_user_external,
is_user_member_of_an_allowed_group,
)

# We could maybe use hyperion.security
hyperion_access_logger = logging.getLogger("hyperion.access")
Expand Down Expand Up @@ -131,7 +134,7 @@ def get_settings() -> Settings:
"""
# `lru_cache()` decorator is here to prevent the class to be instantiated multiple times.
# See https://fastapi.tiangolo.com/advanced/settings/#lru_cache-technical-details
return Settings(_env_file=".env")
return construct_prod_settings()


def get_redis_client(
Expand Down
2 changes: 0 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ services:
condition: service_healthy
hyperion-redis:
condition: service_started
environment:
- GUNICORN_CMD_ARGS="--preload"
ports:
- 8000:80
volumes:
Expand Down
92 changes: 92 additions & 0 deletions gunicorn.conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging
import multiprocessing
import os

from app.app import init_db
from app.core.config import construct_prod_settings
from app.core.log import LogConfig

# This gunicorn configuration file is based on [`uvicorn-gunicorn-docker` image config file](https://github.com/tiangolo/uvicorn-gunicorn-docker/blob/master/docker-images/gunicorn_conf.py)
# It define an `on_starting` hook that instantiate the database and run migrations

# For usage with uvicorn-gunicorn-docker image see:
# https://github.com/tiangolo/uvicorn-gunicorn-docker?tab=readme-ov-file#gunicorn_conf


workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1")
max_workers_str = os.getenv("MAX_WORKERS")
use_max_workers = None
if max_workers_str:
use_max_workers = int(max_workers_str)
web_concurrency_str = os.getenv("WEB_CONCURRENCY", None)

host = os.getenv("HOST", "0.0.0.0")
port = os.getenv("PORT", "80")
bind_env = os.getenv("BIND", None)
use_loglevel = os.getenv("LOG_LEVEL", "info")
if bind_env: # noqa: SIM108
use_bind = bind_env
else:
use_bind = f"{host}:{port}"

cores = multiprocessing.cpu_count()
workers_per_core = float(workers_per_core_str)
default_web_concurrency = workers_per_core * cores
if web_concurrency_str:
web_concurrency = int(web_concurrency_str)
assert web_concurrency > 0 # noqa: S101
else:
web_concurrency = max(int(default_web_concurrency), 2)
if use_max_workers:
web_concurrency = min(web_concurrency, use_max_workers)
accesslog_var = os.getenv("ACCESS_LOG", "-")
use_accesslog = accesslog_var or None
errorlog_var = os.getenv("ERROR_LOG", "-")
use_errorlog = errorlog_var or None
graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120")
timeout_str = os.getenv("TIMEOUT", "120")
keepalive_str = os.getenv("KEEP_ALIVE", "5")

# Gunicorn config variables
loglevel = use_loglevel
workers = web_concurrency
bind = use_bind
errorlog = use_errorlog
worker_tmp_dir = "/dev/shm" # noqa: S108
accesslog = use_accesslog
graceful_timeout = int(graceful_timeout_str)
timeout = int(timeout_str)
keepalive = int(keepalive_str)


def on_starting(server) -> None:
"""
The hook is called just before the master process is initialized. We use it to instantiate the database and run migrations
An gunicorn.arbiter.Arbiter instance is passed as an argument.
See https://docs.gunicorn.org/en/stable/settings.html#on-starting
"""
# We call `construct_prod_settings()` and not the dependency `get_settings()` because:
# - we know we want to use the production settings
# - we will edit environment variables to avoid initializing the database and `get_settings()` is a cached function
settings = construct_prod_settings()

# We set an environment variable to tell workers to avoid initializing the database
# as we want to do it only once before workers are forked from the arbiter
os.environ["HYPERION_INIT_DB"] = "False"

# Initialize loggers
LogConfig().initialize_loggers(settings=settings)

hyperion_error_logger = logging.getLogger("hyperion.error")

hyperion_error_logger.warning(
"Starting Gunicorn server and initializing the database.",
)

init_db(
settings=settings,
hyperion_error_logger=hyperion_error_logger,
drop_db=False,
)

0 comments on commit 959ac7d

Please sign in to comment.