diff --git a/docker-compose.yml b/docker-compose.yml index d5be4666..fa73ee07 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -56,6 +56,8 @@ services: - "8000:8000" volumes: - ./data/ingest:/data/ingest + # Use a TTY so colors are preserved in the log output + tty: true web: image: ghcr.io/microbiomedata/nmdc-server/client:main diff --git a/nmdc_server/app.py b/nmdc_server/app.py index ffa216be..67e77477 100644 --- a/nmdc_server/app.py +++ b/nmdc_server/app.py @@ -14,6 +14,7 @@ from nmdc_server import __version__, api, auth, errors from nmdc_server.config import settings +from nmdc_server.database import after_cursor_execute, before_cursor_execute, listen from nmdc_server.static_files import generate_submission_schema_files, initialize_static_directory @@ -35,6 +36,12 @@ def attach_sentry(app: FastAPI): def create_app(env: typing.Mapping[str, str]) -> FastAPI: + if settings.print_sql: + from sqlalchemy.engine import Engine + + listen(Engine, "before_cursor_execute", before_cursor_execute) + listen(Engine, "after_cursor_execute", after_cursor_execute) + def generate_and_mount_static_files(): static_path = initialize_static_directory(remove_existing=True) generate_submission_schema_files(directory=static_path) diff --git a/nmdc_server/cli.py b/nmdc_server/cli.py index dde3c609..d1d5180e 100644 --- a/nmdc_server/cli.py +++ b/nmdc_server/cli.py @@ -238,6 +238,10 @@ def shell(print_sql: bool, script: Optional[Path]): exec_lines.append("settings.print_sql = True") print("SQL debugging is ON") + exec_lines.append("import os") + exec_lines.append("from nmdc_server.app import create_app") + exec_lines.append("app = create_app(env=os.environ.copy())") + c = Config() c.InteractiveShellApp.exec_lines = exec_lines c.InteractiveShellApp.extensions = ["autoreload"] diff --git a/nmdc_server/database.py b/nmdc_server/database.py index 33156d4a..250284b8 100644 --- a/nmdc_server/database.py +++ b/nmdc_server/database.py @@ -1,4 +1,13 @@ +import sys +import time +import traceback + +import click +import sqlparse from debug_toolbar.panels.sqlalchemy import SQLAlchemyPanel as BasePanel +from pygments import highlight +from pygments.formatters import TerminalFormatter +from pygments.lexers import SqlLexer from sqlalchemy import create_engine from sqlalchemy.event import listen from sqlalchemy.ext.declarative import declarative_base @@ -9,9 +18,63 @@ from nmdc_server.multiomics import MultiomicsValue from nmdc_server.utils import json_serializer + +def pretty_format_sql(sql, params=None): + """ + Interpolate SQL with params and add formatting and syntax highlighting. + """ + if params and isinstance(params, dict): + sql = sql % {k: repr(v) for k, v in params.items()} + elif params: + raise Exception("Failed to format SQL for debug logging") + + sql = sqlparse.format( + sql, + reindent=True, + keyword_case="upper", + wrap_after=80, + strip_whitespace=True, + ) + + return highlight(sql, SqlLexer(), TerminalFormatter()) + + +def before_cursor_execute(conn, cursor, statement, parameters, context, executemany): + conn.info.setdefault("query_start_time", []).append(time.time()) + + +def after_cursor_execute(conn, cursor, statement, parameters, context, executemany): + total = time.time() - conn.info["query_start_time"].pop() + + stack = traceback.extract_stack() + + caller = None + for frame in reversed(stack[:-1]): # skip the current frame + # ignore frames that come from sqlalchemy directly + if "sqlalchemy" not in frame.filename: + caller = frame + break + + if not executemany: + parameters = [parameters] + + for parameter_set in parameters: + formatted_sql = pretty_format_sql(statement, parameter_set) + click.echo(formatted_sql, file=sys.stderr, nl=False) + + if caller: + click.secho( + f"Source: {caller.filename}:{caller.lineno}", + fg="yellow", + file=sys.stderr, + ) + # This is technically the execution time for all of the queries in this commit. So for bulk + # operations, this will be the total execution time for the entire operation. + click.secho("Execution time: {:.3f}s\n".format(total), fg="red", bold=True, file=sys.stderr) + + _engine_kwargs = { "json_serializer": json_serializer, - "echo": settings.print_sql, "pool_size": settings.db_pool_size, "max_overflow": settings.db_pool_max_overflow, } diff --git a/pyproject.toml b/pyproject.toml index 5d15e4cd..5e7fc3ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,6 +150,7 @@ dependencies = [ "sparqlslurper==0.5.1", "sparqlwrapper==2.0.0", "sqlalchemy<2", + "sqlparse==0.5.3", "stack-data==0.6.3", "starlette==0.38.6", "tomli==2.0.2",