From b6e6f3fcc6737aa82238b9a87fefa9764da199a4 Mon Sep 17 00:00:00 2001 From: Robin Holzinger <127766570+RobinHolzingerQC@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:55:38 +0100 Subject: [PATCH] Address issues of missing `bucket_name` in `s3fs` paths (#673) * pin typer and add defaults (api change) * Adjust S3 config w.r.t. to s3fs requirements * pin typer>=0.9,<1.0 * remove comment * revert cli changes * update docs * add S3_BUCKET_NAME env variable * pin typer in setup.cfg * fix? * test_cli env. variable fix * remove hard coded S3_BUCKET_NAME (use env) * try setting QUETZ_S3_BUCKET_NAME in ci * add debug statements to find where env variable is lost * remove commented out code --- .github/workflows/ci.yml | 5 ++++- .gitignore | 3 +++ docs/source/deploying/configuration.rst | 7 +++++-- quetz/config.py | 4 +++- quetz/pkgstores.py | 5 ++++- quetz/tests/test_cli.py | 5 ++++- quetz/tests/test_config.py | 1 - quetz/tests/test_pkg_stores.py | 2 +- 8 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 981f0d09..30b15d5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,12 +68,12 @@ jobs: S3_SECRET_KEY: ${{ secrets.s3_secret_key }} S3_ENDPOINT: https://s3.sbg.cloud.ovh.net/ S3_REGION: sbg + S3_BUCKET_NAME: quetz run: | # install dev dependencies pip install -e .[all,dev] pip install redis rq pip install pytest-github-actions-annotate-failures - - name: Testing server shell: bash -l -eo pipefail {0} env: @@ -85,6 +85,7 @@ jobs: S3_SECRET_KEY: ${{ secrets.s3_secret_key }} S3_ENDPOINT: https://s3.sbg.cloud.ovh.net/ S3_REGION: sbg + S3_BUCKET_NAME: quetz run: | if [ "$TEST_DB_BACKEND" == "postgres" ]; then export QUETZ_TEST_DATABASE="postgresql://postgres:mysecretpassword@${POSTGRES_HOST}:${POSTGRES_PORT}/postgres" @@ -96,6 +97,7 @@ jobs: fi export QUETZ_IS_TEST=1 + pytest -v ./quetz/tests/ --cov-config=pyproject.toml --cov=. --cov-report=xml - name: Test the plugins @@ -109,6 +111,7 @@ jobs: S3_SECRET_KEY: ${{ secrets.s3_secret_key }} S3_ENDPOINT: https://s3.sbg.cloud.ovh.net/ S3_REGION: sbg + S3_BUCKET_NAME: quetz run: | if [ "$TEST_DB_BACKEND" == "postgres" ]; then export QUETZ_TEST_DATABASE="postgresql://postgres:mysecretpassword@${POSTGRES_HOST}:${POSTGRES_PORT}/postgres" diff --git a/.gitignore b/.gitignore index f73b38a0..d31e4850 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,6 @@ test_quetz .env .envrc .venv + +# OS +.DS_Store diff --git a/docs/source/deploying/configuration.rst b/docs/source/deploying/configuration.rst index 97672de5..a39e23e5 100644 --- a/docs/source/deploying/configuration.rst +++ b/docs/source/deploying/configuration.rst @@ -156,16 +156,18 @@ Quetz can store package in object cloud storage compatible with S3 interface. To [s3] access_key = "AKIAIOSFODNN7EXAMPLE" secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" - url = "https://..." + url = "https://s3.amazonaws.com" region = "" + bucket_name = "" bucket_prefix="..." bucket_suffix="..." :access key: :secret key: credentials to S3 account, if you use IAM roles, don't set them or set them to ``""`` -:url: set to the S3 endpoint of your provider (for AWS, you can skip it) +:url: set to the S3 endpoint (excluding bucket name) of your provider (for AWS, you can skip it) :region: region of the S3 instance +:bucket_name: name of the bucket to store the data in :bucket_prefix: :bucket_suffix: channel directories on S3 are created with the following semantics: ``{bucket_prefix}{channel_name}{bucket_suffix}`` @@ -276,5 +278,6 @@ Variable description values ``S3_SECRET_KEY`` secret key to s3 (used in tests) string ``S3_ENDPOINT`` s3 endpoint url string ``S3_REGION`` s3 region string +``S3_BUCKET_NAME`` s3 bucket name string ======================= ====================================== =========================== =================== diff --git a/quetz/config.py b/quetz/config.py index 82803877..ae2460d9 100644 --- a/quetz/config.py +++ b/quetz/config.py @@ -128,8 +128,9 @@ class Config: [ ConfigEntry("access_key", str, default=""), ConfigEntry("secret_key", str, default=""), - ConfigEntry("url", str, default=""), + ConfigEntry("url", str, default="https://s3.amazonaws.com"), ConfigEntry("region", str, default=""), + ConfigEntry("bucket_name", str, default=""), ConfigEntry("bucket_prefix", str, default=""), ConfigEntry("bucket_suffix", str, default=""), ], @@ -450,6 +451,7 @@ def get_package_store(self) -> pkgstores.PackageStore: 'secret': self.s3_secret_key, 'url': self.s3_url, 'region': self.s3_region, + 'bucket_name': self.s3_bucket_name, 'bucket_prefix': self.s3_bucket_prefix, 'bucket_suffix': self.s3_bucket_suffix, } diff --git a/quetz/pkgstores.py b/quetz/pkgstores.py index 5ce49497..8206c353 100644 --- a/quetz/pkgstores.py +++ b/quetz/pkgstores.py @@ -292,8 +292,11 @@ def __init__(self, config): client_kwargs = {} url = config.get('url') region = config.get("region") + self.bucket_name = config.get("bucket_name") if url: client_kwargs['endpoint_url'] = url + if not self.bucket_name: + raise ValueError("bucket_name is required in s3 configuration") if region: client_kwargs["region_name"] = region @@ -326,7 +329,7 @@ def _get_fs(self): raise ConfigError(f"{e} - check configured S3 credentials") def _bucket_map(self, name): - return f"{self.bucket_prefix}{name}{self.bucket_suffix}" + return f"{self.bucket_name}/{self.bucket_prefix}{name}{self.bucket_suffix}" def create_channel(self, name): """Create the bucket if one doesn't already exist diff --git a/quetz/tests/test_cli.py b/quetz/tests/test_cli.py index 8940cca2..5445f7c6 100644 --- a/quetz/tests/test_cli.py +++ b/quetz/tests/test_cli.py @@ -7,12 +7,12 @@ from unittest import mock from unittest.mock import MagicMock -import pytest import sqlalchemy as sa from alembic.script import ScriptDirectory from pytest_mock.plugin import MockerFixture from typer.testing import CliRunner +import pytest from quetz import cli from quetz.config import Config from quetz.db_models import Base, Identity, User @@ -554,9 +554,12 @@ def database_url_environment_variable(database_url) -> None: @pytest.fixture() def s3_environment_variable() -> None: os.environ["QUETZ_S3_ACCESS_KEY"] = "fake_key" + os.environ["QUETZ_S3_BUCKET_NAME"] = "fake_bucket" yield if "QUETZ_S3_ACCESS_KEY" in os.environ: del os.environ["QUETZ_S3_ACCESS_KEY"] + if "QUETZ_S3_BUCKET_NAME" in os.environ: + del os.environ["QUETZ_S3_BUCKET_NAME"] @pytest.fixture() diff --git a/quetz/tests/test_config.py b/quetz/tests/test_config.py index d6c72ad0..1f861c98 100644 --- a/quetz/tests/test_config.py +++ b/quetz/tests/test_config.py @@ -3,7 +3,6 @@ import tempfile import pytest - from quetz.config import Config, ConfigEntry, ConfigSection, configure_logger from quetz.dao import Dao from quetz.errors import ConfigError diff --git a/quetz/tests/test_pkg_stores.py b/quetz/tests/test_pkg_stores.py index 52f09784..48d41955 100644 --- a/quetz/tests/test_pkg_stores.py +++ b/quetz/tests/test_pkg_stores.py @@ -8,7 +8,6 @@ from pathlib import Path import pytest - from quetz.pkgstores import ( AzureBlobStore, GoogleCloudStorageStore, @@ -23,6 +22,7 @@ 'secret': os.environ.get("S3_SECRET_KEY"), 'url': os.environ.get("S3_ENDPOINT"), 'region': os.environ.get("S3_REGION"), + 'bucket_name': os.environ.get("S3_BUCKET_NAME"), 'bucket_prefix': "test", 'bucket_suffix': "", }