diff --git a/.env-devel b/.env-devel index f2e27bf1c32..a2fe30524ce 100644 --- a/.env-devel +++ b/.env-devel @@ -18,6 +18,12 @@ AGENT_VOLUMES_CLEANUP_S3_PROVIDER=MINIO API_SERVER_DEV_FEATURES_ENABLED=0 +AUTOSCALING_DASK=null +AUTOSCALING_EC2_ACCESS=null +AUTOSCALING_EC2_INSTANCES=null +AUTOSCALING_NODES_MONITORING=null +AUTOSCALING_POLL_INTERVAL=10 + BF_API_KEY=none BF_API_SECRET=none diff --git a/.pylintrc b/.pylintrc index bae6b832f9a..f3f789e9c63 100644 --- a/.pylintrc +++ b/.pylintrc @@ -243,7 +243,7 @@ contextmanager-decorators=contextlib.contextmanager # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. -generated-members= +generated-members=sh # Tells whether to warn about missing members when the owner of the attribute # is inferred to be None. diff --git a/packages/aws-library/requirements/_base.in b/packages/aws-library/requirements/_base.in index a0fd39eb41f..bff24c55562 100644 --- a/packages/aws-library/requirements/_base.in +++ b/packages/aws-library/requirements/_base.in @@ -10,3 +10,4 @@ aioboto3 aiocache pydantic[email] types-aiobotocore[ec2] +sh diff --git a/packages/aws-library/requirements/_base.txt b/packages/aws-library/requirements/_base.txt index 2e125c14ad8..9267ce51d19 100644 --- a/packages/aws-library/requirements/_base.txt +++ b/packages/aws-library/requirements/_base.txt @@ -154,6 +154,8 @@ rpds-py==0.12.0 # referencing s3transfer==0.7.0 # via boto3 +sh==2.0.6 + # via -r requirements/_base.in six==1.16.0 # via python-dateutil tenacity==8.2.3 diff --git a/packages/aws-library/requirements/ci.txt b/packages/aws-library/requirements/ci.txt index 604f51d486e..0672404f4ca 100644 --- a/packages/aws-library/requirements/ci.txt +++ b/packages/aws-library/requirements/ci.txt @@ -13,6 +13,7 @@ # installs this repo's packages ../pytest-simcore/ +../models-library/ ../service-library/ ../settings-library/ diff --git a/packages/aws-library/requirements/dev.txt b/packages/aws-library/requirements/dev.txt index b183acca92c..9017cb30880 100644 --- a/packages/aws-library/requirements/dev.txt +++ b/packages/aws-library/requirements/dev.txt @@ -13,6 +13,7 @@ # installs this repo's packages --editable ../pytest-simcore/ +--editable ../models-library/ --editable ../service-library/ --editable ../settings-library/ diff --git a/packages/aws-library/src/aws_library/ec2/models.py b/packages/aws-library/src/aws_library/ec2/models.py index d124763cebd..21b0f89b513 100644 --- a/packages/aws-library/src/aws_library/ec2/models.py +++ b/packages/aws-library/src/aws_library/ec2/models.py @@ -1,8 +1,19 @@ import datetime +import tempfile from dataclasses import dataclass -from typing import TypeAlias - -from pydantic import BaseModel, ByteSize, NonNegativeFloat, PositiveInt +from typing import Any, ClassVar, TypeAlias + +import sh +from models_library.docker import DockerGenericTag +from pydantic import ( + BaseModel, + ByteSize, + Extra, + Field, + NonNegativeFloat, + PositiveInt, + validator, +) from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType @@ -74,3 +85,88 @@ class EC2InstanceConfig: key_name: str security_group_ids: list[str] subnet_id: str + + +AMIIdStr: TypeAlias = str +CommandStr: TypeAlias = str + + +class EC2InstanceBootSpecific(BaseModel): + ami_id: AMIIdStr + custom_boot_scripts: list[CommandStr] = Field( + default_factory=list, + description="script(s) to run on EC2 instance startup (be careful!), " + "each entry is run one after the other using '&&' operator", + ) + pre_pull_images: list[DockerGenericTag] = Field( + default_factory=list, + description="a list of docker image/tags to pull on instance cold start", + ) + pre_pull_images_cron_interval: datetime.timedelta = Field( + default=datetime.timedelta(minutes=30), + description="time interval between pulls of images (minimum is 1 minute) " + "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", + ) + + class Config: + extra = Extra.forbid + schema_extra: ClassVar[dict[str, Any]] = { + "examples": [ + { + # just AMI + "ami_id": "ami-123456789abcdef", + }, + { + # AMI + scripts + "ami_id": "ami-123456789abcdef", + "custom_boot_scripts": ["ls -tlah", "echo blahblah"], + }, + { + # AMI + scripts + pre-pull + "ami_id": "ami-123456789abcdef", + "custom_boot_scripts": ["ls -tlah", "echo blahblah"], + "pre_pull_images": [ + "nginx:latest", + "itisfoundation/my-very-nice-service:latest", + "simcore/services/dynamic/another-nice-one:2.4.5", + "asd", + ], + }, + { + # AMI + pre-pull + "ami_id": "ami-123456789abcdef", + "pre_pull_images": [ + "nginx:latest", + "itisfoundation/my-very-nice-service:latest", + "simcore/services/dynamic/another-nice-one:2.4.5", + "asd", + ], + }, + { + # AMI + pre-pull + cron + "ami_id": "ami-123456789abcdef", + "pre_pull_images": [ + "nginx:latest", + "itisfoundation/my-very-nice-service:latest", + "simcore/services/dynamic/another-nice-one:2.4.5", + "asd", + ], + "pre_pull_images_cron_interval": "01:00:00", + }, + ] + } + + @validator("custom_boot_scripts") + @classmethod + def validate_bash_calls(cls, v): + try: + with tempfile.NamedTemporaryFile(mode="wt", delete=True) as temp_file: + temp_file.writelines(v) + temp_file.flush() + # NOTE: this will not capture runtime errors, but at least some syntax errors such as invalid quotes + sh.bash("-n", temp_file.name) + except sh.ErrorReturnCode as exc: + msg = f"Invalid bash call in custom_boot_scripts: {v}, Error: {exc.stderr}" + raise ValueError(msg) from exc + + return v diff --git a/packages/settings-library/src/settings_library/ec2.py b/packages/settings-library/src/settings_library/ec2.py index 710db9c6e4f..2cd7cf0b9a6 100644 --- a/packages/settings-library/src/settings_library/ec2.py +++ b/packages/settings-library/src/settings_library/ec2.py @@ -18,7 +18,7 @@ class Config(BaseCustomSettings.Config): "examples": [ { "EC2_ACCESS_KEY_ID": "my_access_key_id", - "EC2_ENDPOINT": "http://my_ec2_endpoint.com", + "EC2_ENDPOINT": "https://my_ec2_endpoint.com", "EC2_REGION_NAME": "us-east-1", "EC2_SECRET_ACCESS_KEY": "my_secret_access_key", } diff --git a/services/autoscaling/README.md b/services/autoscaling/README.md index 70785f9659f..cd2abf6bcb8 100644 --- a/services/autoscaling/README.md +++ b/services/autoscaling/README.md @@ -1,16 +1,13 @@ # autoscaling -[![image-size]](https://microbadger.com/images/itisfoundation/autoscaling. "More on itisfoundation/autoscaling.:staging-latest image") +Service to auto-scale swarm for both dynamic and computational services -[![image-badge]](https://microbadger.com/images/itisfoundation/autoscaling "More on Auto scaling service image in registry") -[![image-version]](https://microbadger.com/images/itisfoundation/autoscaling "More on Auto scaling service image in registry") -[![image-commit]](https://microbadger.com/images/itisfoundation/autoscaling "More on Auto scaling service image in registry") -Service to auto-scale swarm +## development - -[image-size]:https://img.shields.io/microbadger/image-size/itisfoundation/autoscaling./staging-latest.svg?label=autoscaling.&style=flat -[image-badge]:https://images.microbadger.com/badges/image/itisfoundation/autoscaling.svg -[image-version]https://images.microbadger.com/badges/version/itisfoundation/autoscaling.svg -[image-commit]:https://images.microbadger.com/badges/commit/itisfoundation/autoscaling.svg - +``` +make install-dev +make test-dev-unit + +# NOTE: there are manual tests that need access to AWS EC2 instances! +``` diff --git a/services/autoscaling/requirements/_base.txt b/services/autoscaling/requirements/_base.txt index bb189297c0e..621da3d81fc 100644 --- a/services/autoscaling/requirements/_base.txt +++ b/services/autoscaling/requirements/_base.txt @@ -7,6 +7,7 @@ aio-pika==9.3.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in aioboto3==12.0.0 # via -r requirements/../../../packages/aws-library/requirements/_base.in @@ -21,18 +22,26 @@ aiocache==0.12.2 aiodebug==2.3.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.21.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in aiofiles==23.2.1 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in aiohttp==3.8.6 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -58,6 +67,9 @@ anyio==4.0.0 arrow==1.3.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/./../../../packages/models-library/requirements/_base.in @@ -82,6 +94,11 @@ botocore-stubs==1.31.77 # via types-aiobotocore certifi==2023.7.22 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -125,6 +142,11 @@ exceptiongroup==1.1.3 # via anyio fastapi==0.99.1 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -153,6 +175,11 @@ httpcore==0.18.0 # via httpx httpx==0.25.0 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -176,6 +203,11 @@ importlib-metadata==6.8.0 # dask jinja2==3.1.2 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -194,6 +226,8 @@ jmespath==1.0.1 # botocore jsonschema==4.19.2 # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/./../../../packages/models-library/requirements/_base.in @@ -222,6 +256,8 @@ multidict==6.0.4 # yarl orjson==3.9.10 # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/./../../../packages/models-library/requirements/_base.in @@ -243,6 +279,11 @@ psutil==5.9.5 # distributed pydantic==1.10.13 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -254,6 +295,11 @@ pydantic==1.10.13 # -c requirements/../../../packages/service-library/requirements/./_base.in # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/_base.in # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in @@ -268,6 +314,7 @@ pygments==2.16.1 pyinstrument==4.6.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in python-dateutil==2.8.2 # via @@ -275,6 +322,11 @@ python-dateutil==2.8.2 # botocore pyyaml==6.0.1 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -287,11 +339,17 @@ pyyaml==6.0.1 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in # dask # distributed redis==5.0.1 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -303,15 +361,19 @@ redis==5.0.1 # -c requirements/../../../packages/service-library/requirements/./_base.in # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in referencing==0.29.3 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/./constraints.txt # -c requirements/../../../packages/service-library/requirements/././constraints.txt # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications rich==13.6.0 # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/./../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in @@ -321,6 +383,8 @@ rpds-py==0.10.6 # referencing s3transfer==0.7.0 # via boto3 +sh==2.0.6 + # via -r requirements/../../../packages/aws-library/requirements/_base.in six==1.16.0 # via python-dateutil sniffio==1.3.0 @@ -334,6 +398,11 @@ sortedcontainers==2.4.0 # distributed starlette==0.27.0 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -352,11 +421,13 @@ tblib==2.0.0 tenacity==8.2.3 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in toolz==0.12.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in # dask # distributed @@ -368,9 +439,12 @@ tornado==6.3.3 tqdm==4.66.1 # via # -c requirements/../../../packages/service-library/requirements/./_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in typer==0.9.0 # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/./../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in @@ -394,6 +468,11 @@ typing-extensions==4.8.0 # uvicorn urllib3==1.26.16 # via + # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/aws-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/aws-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt diff --git a/services/autoscaling/src/simcore_service_autoscaling/api/health.py b/services/autoscaling/src/simcore_service_autoscaling/api/health.py index 0091d6d877b..07c29f3a198 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/api/health.py +++ b/services/autoscaling/src/simcore_service_autoscaling/api/health.py @@ -5,6 +5,7 @@ """ import datetime +from typing import Annotated from fastapi import APIRouter, Depends, FastAPI from fastapi.responses import PlainTextResponse @@ -37,8 +38,7 @@ class _StatusGet(BaseModel): @router.get("/status", include_in_schema=True, response_model=_StatusGet) -async def get_status(app: FastAPI = Depends(get_app)) -> _StatusGet: - +async def get_status(app: Annotated[FastAPI, Depends(get_app)]) -> _StatusGet: return _StatusGet( rabbitmq=_ComponentStatus( is_enabled=bool(app.state.rabbitmq_client), diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/settings.py b/services/autoscaling/src/simcore_service_autoscaling/core/settings.py index be378286fad..12ce642d348 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/core/settings.py +++ b/services/autoscaling/src/simcore_service_autoscaling/core/settings.py @@ -1,7 +1,8 @@ import datetime from functools import cached_property -from typing import cast +from typing import Any, ClassVar, Final, cast +from aws_library.ec2.models import EC2InstanceBootSpecific from fastapi import FastAPI from models_library.basic_types import ( BootModeEnum, @@ -9,7 +10,7 @@ LogLevel, VersionTag, ) -from models_library.docker import DockerGenericTag, DockerLabelKey +from models_library.docker import DockerLabelKey from pydantic import ( AnyUrl, Field, @@ -29,23 +30,31 @@ from .._meta import API_VERSION, API_VTAG, APP_NAME +AUTOSCALING_ENV_PREFIX: Final[str] = "AUTOSCALING_" + + +class AutoscalingEC2Settings(EC2Settings): + class Config(EC2Settings.Config): + env_prefix = AUTOSCALING_ENV_PREFIX + + schema_extra: ClassVar[dict[str, Any]] = { + "examples": [ + { + f"{AUTOSCALING_ENV_PREFIX}EC2_ACCESS_KEY_ID": "my_access_key_id", + f"{AUTOSCALING_ENV_PREFIX}EC2_ENDPOINT": "https://my_ec2_endpoint.com", + f"{AUTOSCALING_ENV_PREFIX}EC2_REGION_NAME": "us-east-1", + f"{AUTOSCALING_ENV_PREFIX}EC2_SECRET_ACCESS_KEY": "my_secret_access_key", + } + ], + } + class EC2InstancesSettings(BaseCustomSettings): - EC2_INSTANCES_ALLOWED_TYPES: list[str] = Field( + EC2_INSTANCES_ALLOWED_TYPES: dict[str, EC2InstanceBootSpecific] = Field( ..., - min_items=1, - unique_items=True, - description="Defines which EC2 instances are considered as candidates for new EC2 instance", - ) - EC2_INSTANCES_AMI_ID: str = Field( - ..., - min_length=1, - description="Defines the AMI (Amazon Machine Image) ID used to start a new EC2 instance", - ) - EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS: list[str] = Field( - default_factory=list, - description="script(s) to run on EC2 instance startup (be careful!), each entry is run one after the other using '&&' operator", + description="Defines which EC2 instances are considered as candidates for new EC2 instance and their respective boot specific parameters", ) + EC2_INSTANCES_KEY_NAME: str = Field( ..., min_length=1, @@ -72,15 +81,7 @@ class EC2InstancesSettings(BaseCustomSettings): min_length=1, description="prefix used to name the EC2 instances created by this instance of autoscaling", ) - EC2_INSTANCES_PRE_PULL_IMAGES: list[DockerGenericTag] = Field( - default_factory=list, - description="a list of docker image/tags to pull on instance cold start", - ) - EC2_INSTANCES_PRE_PULL_IMAGES_CRON_INTERVAL: datetime.timedelta = Field( - default=datetime.timedelta(minutes=30), - description="time interval between pulls of images (minimum is 1 minute) " - "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", - ) + EC2_INSTANCES_SECURITY_GROUP_IDS: list[str] = Field( ..., min_items=1, @@ -112,11 +113,16 @@ def ensure_time_is_in_range(cls, value): @validator("EC2_INSTANCES_ALLOWED_TYPES") @classmethod - def check_valid_intance_names(cls, value): + def check_valid_instance_names( + cls, value: dict[str, EC2InstanceBootSpecific] + ) -> dict[str, EC2InstanceBootSpecific]: # NOTE: needed because of a flaw in BaseCustomSettings # issubclass raises TypeError if used on Aliases - parse_obj_as(tuple[InstanceTypeType, ...], value) - return value + if all(parse_obj_as(InstanceTypeType, key) for key in value): + return value + + msg = "Invalid instance type name" + raise ValueError(msg) class NodesMonitoringSettings(BaseCustomSettings): @@ -184,7 +190,9 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): description="Enables local development log format. WARNING: make sure it is disabled if you want to have structured logs!", ) - AUTOSCALING_EC2_ACCESS: EC2Settings | None = Field(auto_default_from_env=True) + AUTOSCALING_EC2_ACCESS: AutoscalingEC2Settings | None = Field( + auto_default_from_env=True + ) AUTOSCALING_EC2_INSTANCES: EC2InstancesSettings | None = Field( auto_default_from_env=True diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py index 08cd815c5ad..1e0c3f28a9e 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py @@ -184,9 +184,9 @@ async def sorted_allowed_instance_types(app: FastAPI) -> list[EC2InstanceType]: def _sort_according_to_allowed_types(instance_type: EC2InstanceType) -> int: assert app_settings.AUTOSCALING_EC2_INSTANCES # nosec - return app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES.index( - f"{instance_type.name}" - ) + return list( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES + ).index(f"{instance_type.name}") allowed_instance_types.sort(key=_sort_according_to_allowed_types) return allowed_instance_types @@ -452,15 +452,21 @@ async def _start_instances( assert app_settings.AUTOSCALING_EC2_INSTANCES # nosec instance_tags = auto_scaling_mode.get_ec2_tags(app) - instance_startup_script = await ec2_startup_script(app_settings) results = await asyncio.gather( *[ ec2_client.start_aws_instance( EC2InstanceConfig( type=instance_type, tags=instance_tags, - startup_script=instance_startup_script, - ami_id=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_AMI_ID, + startup_script=await ec2_startup_script( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES[ + instance_type.name + ], + app_settings, + ), + ami_id=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES[ + instance_type.name + ].ami_id, key_name=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME, security_group_ids=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_SECURITY_GROUP_IDS, subnet_id=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_SUBNET_ID, @@ -618,7 +624,7 @@ async def _find_terminateable_instances( _logger.info( "%s has still %ss before being terminateable", f"{instance.ec2_instance.id=}", - f"{(elapsed_time_since_drained - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION).total_seconds()}", + f"{(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION - elapsed_time_since_drained).total_seconds()}", ) if terminateable_nodes: diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py index 106b68ea352..f0fe674e9db 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py @@ -3,7 +3,12 @@ import re from typing import Final -from aws_library.ec2.models import EC2InstanceData, EC2InstanceType, Resources +from aws_library.ec2.models import ( + EC2InstanceBootSpecific, + EC2InstanceData, + EC2InstanceType, + Resources, +) from models_library.generated_models.docker_rest_api import Node from types_aiobotocore_ec2.literals import InstanceTypeType @@ -74,15 +79,14 @@ def _find_node_with_name(node: Node) -> bool: return associated_instances, non_associated_instances -async def ec2_startup_script(app_settings: ApplicationSettings) -> str: - assert app_settings.AUTOSCALING_EC2_INSTANCES # nosec - startup_commands = ( - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS.copy() - ) +async def ec2_startup_script( + ec2_boot_specific: EC2InstanceBootSpecific, app_settings: ApplicationSettings +) -> str: + startup_commands = ec2_boot_specific.custom_boot_scripts.copy() startup_commands.append(await utils_docker.get_docker_swarm_join_bash_command()) if app_settings.AUTOSCALING_REGISTRY: # noqa: SIM102 if pull_image_cmd := utils_docker.get_docker_pull_images_on_start_bash_command( - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_PRE_PULL_IMAGES + ec2_boot_specific.pre_pull_images ): startup_commands.append( " && ".join( @@ -96,7 +100,7 @@ async def ec2_startup_script(app_settings: ApplicationSettings) -> str: ) startup_commands.append( utils_docker.get_docker_pull_images_crontab( - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_PRE_PULL_IMAGES_CRON_INTERVAL + ec2_boot_specific.pre_pull_images_cron_interval ), ) diff --git a/services/autoscaling/tests/manual/.env-devel b/services/autoscaling/tests/manual/.env-devel index 80e54412ce4..a56c8ab125b 100644 --- a/services/autoscaling/tests/manual/.env-devel +++ b/services/autoscaling/tests/manual/.env-devel @@ -1,14 +1,18 @@ AUTOSCALING_DEBUG=true AUTOSCALING_LOGLEVEL=INFO AUTOSCALING_TASK_INTERVAL=30 -EC2_ACCESS_KEY_ID=XXXXXXXXXX -EC2_INSTANCES_ALLOWED_TYPES="[\"t2.micro\"]" -EC2_INSTANCES_AMI_ID=XXXXXXXXXX +AUTOSCALING_EC2_ACCESS_KEY_ID=XXXXXXXXXX +AUTOSCALING_EC2_SECRET_ACCESS_KEY=XXXXXXXXXX +AUTOSCALING_EC2_ENDPOINT=https://ec2.amazonaws.com +AUTOSCALING_EC2_REGION_NAME=us-east-1 +EC2_INSTANCES_MACHINES_BUFFER=0 +EC2_INSTANCES_MAX_INSTANCES=20 +EC2_INSTANCES_TIME_BEFORE_TERMINATION="00:03:00" +EC2_INSTANCES_ALLOWED_TYPES='{"t2.micro": {"ami_id": "XXXXXXXX", "custom_boot_scripts": ["whoami"], "pre_pull_images": ["ubuntu:latest"]}}' EC2_INSTANCES_KEY_NAME=XXXXXXXXXX +EC2_INSTANCES_NAME_PREFIX=testing-osparc-computational-cluster EC2_INSTANCES_SECURITY_GROUP_IDS="[\"XXXXXXXXXX\"]" EC2_INSTANCES_SUBNET_ID=XXXXXXXXXX -EC2_SECRET_ACCESS_KEY=XXXXXXXXXX -EC2_INSTANCES_NAME_PREFIX=testing-osparc-computational-cluster LOG_FORMAT_LOCAL_DEV_ENABLED=True # define the following to activate dynamic autoscaling # NODES_MONITORING_NEW_NODES_LABELS="[\"testing.autoscaled-node\"]" diff --git a/services/autoscaling/tests/manual/README.md b/services/autoscaling/tests/manual/README.md index e44e739d182..baa8c88aa94 100644 --- a/services/autoscaling/tests/manual/README.md +++ b/services/autoscaling/tests/manual/README.md @@ -8,7 +8,7 @@ The dynamic mode is used directly with docker swarm facilities. ### requirements 1. AWS EC2 access -2. a machine running in EC2 with docker installed and access to osparc-simcore repository +2. a machine running in EC2 with docker installed and access to osparc-simcore repository (for example t2.xlarge to have some computational power) ## computational mode @@ -92,6 +92,7 @@ make build-devel # this will build the autoscaling devel image cd osparc-simcore/services/autoscaling/tests/manual make .env # generate an initial .env file nano .env # edit .env and set the variables as needed +# in particular NODES_MONITORING_NEW_NODES_LABELS, NODES_MONITORING_NODE_LABELS, NODES_MONITORING_SERVICE_LABELS must be activated ``` 3. start autoscaling stack diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py index 14f1132c531..c6a1dbe8191 100644 --- a/services/autoscaling/tests/unit/conftest.py +++ b/services/autoscaling/tests/unit/conftest.py @@ -6,6 +6,7 @@ import dataclasses import datetime import json +import random from collections.abc import AsyncIterator, Awaitable, Callable from copy import deepcopy from pathlib import Path @@ -19,8 +20,7 @@ import pytest import simcore_service_autoscaling from asgi_lifespan import LifespanManager -from aws_library.ec2.client import SimcoreEC2API -from aws_library.ec2.models import EC2InstanceData +from aws_library.ec2.models import EC2InstanceBootSpecific, EC2InstanceData from deepdiff import DeepDiff from faker import Faker from fakeredis.aioredis import FakeRedis @@ -41,7 +41,11 @@ from pytest_simcore.helpers.utils_host import get_localhost_ip from settings_library.rabbit import RabbitSettings from simcore_service_autoscaling.core.application import create_app -from simcore_service_autoscaling.core.settings import ApplicationSettings, EC2Settings +from simcore_service_autoscaling.core.settings import ( + AUTOSCALING_ENV_PREFIX, + ApplicationSettings, + EC2Settings, +) from simcore_service_autoscaling.models import Cluster, DaskTaskResources from simcore_service_autoscaling.modules.docker import AutoscalingDocker from tenacity import retry @@ -86,6 +90,19 @@ def ec2_instances() -> list[InstanceTypeType]: return ["t2.nano", "m5.12xlarge"] +@pytest.fixture +def mocked_ec2_server_envs( + mocked_ec2_server_settings: EC2Settings, + monkeypatch: pytest.MonkeyPatch, +) -> EnvVarsDict: + # NOTE: overrides the EC2Settings with what autoscaling expects + changed_envs: EnvVarsDict = { + f"{AUTOSCALING_ENV_PREFIX}{k}": v + for k, v in mocked_ec2_server_settings.dict().items() + } + return setenvs_from_dict(monkeypatch, changed_envs) + + @pytest.fixture def app_environment( mock_env_devel_environment: EnvVarsDict, @@ -94,18 +111,27 @@ def app_environment( ec2_instances: list[InstanceTypeType], ) -> EnvVarsDict: # SEE https://faker.readthedocs.io/en/master/providers/faker.providers.internet.html?highlight=internet#faker-providers-internet + envs = setenvs_from_dict( monkeypatch, { - "EC2_ACCESS_KEY_ID": faker.pystr(), - "EC2_SECRET_ACCESS_KEY": faker.pystr(), + "AUTOSCALING_EC2_ACCESS": "{}", + "AUTOSCALING_EC2_ACCESS_KEY_ID": faker.pystr(), + "AUTOSCALING_EC2_SECRET_ACCESS_KEY": faker.pystr(), + "AUTOSCALING_EC2_INSTANCES": "{}", "EC2_INSTANCES_KEY_NAME": faker.pystr(), "EC2_INSTANCES_SECURITY_GROUP_IDS": json.dumps( faker.pylist(allowed_types=(str,)) ), "EC2_INSTANCES_SUBNET_ID": faker.pystr(), - "EC2_INSTANCES_AMI_ID": faker.pystr(), - "EC2_INSTANCES_ALLOWED_TYPES": json.dumps(ec2_instances), + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + { + ec2_type_name: random.choice( # noqa: S311 + EC2InstanceBootSpecific.Config.schema_extra["examples"] + ) + for ec2_type_name in ec2_instances + } + ), }, ) return mock_env_devel_environment | envs @@ -128,7 +154,13 @@ def mocked_ec2_instances_envs( "EC2_INSTANCES_SUBNET_ID": aws_subnet_id, "EC2_INSTANCES_AMI_ID": aws_ami_id, "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( - aws_allowed_ec2_instance_type_names + { + ec2_type_name: random.choice( # noqa: S311 + EC2InstanceBootSpecific.Config.schema_extra["examples"] + ) + | {"ami_id": aws_ami_id} + for ec2_type_name in aws_allowed_ec2_instance_type_names + } ), }, ) @@ -155,6 +187,7 @@ def enabled_dynamic_mode( return app_environment | setenvs_from_dict( monkeypatch, { + "AUTOSCALING_NODES_MONITORING": "{}", "NODES_MONITORING_NODE_LABELS": json.dumps(["pytest.fake-node-label"]), "NODES_MONITORING_SERVICE_LABELS": json.dumps( ["pytest.fake-service-label"] @@ -173,6 +206,7 @@ def enabled_computational_mode( return app_environment | setenvs_from_dict( monkeypatch, { + "AUTOSCALING_DASK": "{}", "DASK_MONITORING_URL": faker.url(), "DASK_MONITORING_USER_NAME": faker.user_name(), "DASK_MONITORING_PASSWORD": faker.password(), @@ -181,16 +215,15 @@ def enabled_computational_mode( @pytest.fixture -def disabled_rabbitmq(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch): - monkeypatch.delenv("RABBIT_HOST") - monkeypatch.delenv("RABBIT_USER") - monkeypatch.delenv("RABBIT_SECURE") - monkeypatch.delenv("RABBIT_PASSWORD") +def disabled_rabbitmq( + app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setenv("AUTOSCALING_RABBITMQ", "null") @pytest.fixture -def disabled_ec2(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch): - monkeypatch.delenv("EC2_ACCESS_KEY_ID") +def disabled_ec2(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("AUTOSCALING_EC2_ACCESS", "null") @pytest.fixture @@ -506,17 +539,6 @@ def aws_allowed_ec2_instance_type_names_env( return app_environment | setenvs_from_dict(monkeypatch, changed_envs) -@pytest.fixture -async def autoscaling_ec2( - app_environment: EnvVarsDict, -) -> AsyncIterator[SimcoreEC2API]: - settings = EC2Settings.create_from_envs() - ec2 = await SimcoreEC2API.create(settings) - assert ec2 - yield ec2 - await ec2.close() - - @pytest.fixture def host_cpu_count() -> int: return psutil.cpu_count() diff --git a/services/autoscaling/tests/unit/test_core_settings.py b/services/autoscaling/tests/unit/test_core_settings.py index 8576d5fdb35..1ccf0c2a574 100644 --- a/services/autoscaling/tests/unit/test_core_settings.py +++ b/services/autoscaling/tests/unit/test_core_settings.py @@ -6,9 +6,49 @@ import json import pytest +from faker import Faker from pydantic import ValidationError -from pytest_simcore.helpers.utils_envs import EnvVarsDict -from simcore_service_autoscaling.core.settings import ApplicationSettings +from pytest_simcore.helpers.utils_envs import EnvVarsDict, setenvs_from_dict +from simcore_service_autoscaling.core.settings import ( + ApplicationSettings, + EC2InstancesSettings, +) +from types_aiobotocore_ec2.literals import InstanceTypeType + + +def test_ec2_instances_settings(app_environment: EnvVarsDict): + settings = EC2InstancesSettings.create_from_envs() + assert isinstance(settings.EC2_INSTANCES_ALLOWED_TYPES, dict) + + +@pytest.fixture +def instance_type_with_invalid_boot_script( + mock_env_devel_environment: EnvVarsDict, + monkeypatch: pytest.MonkeyPatch, + faker: Faker, + ec2_instances: list[InstanceTypeType], +) -> EnvVarsDict: + return setenvs_from_dict( + monkeypatch, + { + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + { + ec2_type_name: { + "ami_id": faker.pystr(), + "custom_boot_scripts": ['ls"'], + } + for ec2_type_name in ec2_instances + } + ), + }, + ) + + +def test_ec2_instances_settings_with_invalid_custom_script_raises( + app_environment: EnvVarsDict, instance_type_with_invalid_boot_script: EnvVarsDict +): + with pytest.raises(ValidationError): + EC2InstancesSettings.create_from_envs() def test_settings(app_environment: EnvVarsDict): @@ -48,10 +88,10 @@ def test_defining_both_computational_and_dynamic_modes_is_invalid_and_raises( ApplicationSettings.create_from_envs() -def test_invalid_EC2_INSTANCES_TIME_BEFORE_TERMINATION( +def test_invalid_EC2_INSTANCES_TIME_BEFORE_TERMINATION( # noqa: N802 app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch ): - monkeypatch.setenv("EC2_INSTANCES_TIME_BEFORE_TERMINATION", "1:05:00") + setenvs_from_dict(monkeypatch, {"EC2_INSTANCES_TIME_BEFORE_TERMINATION": "1:05:00"}) settings = ApplicationSettings.create_from_envs() assert settings.AUTOSCALING_EC2_INSTANCES assert settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION @@ -59,8 +99,9 @@ def test_invalid_EC2_INSTANCES_TIME_BEFORE_TERMINATION( datetime.timedelta(minutes=59) == settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION ) - - monkeypatch.setenv("EC2_INSTANCES_TIME_BEFORE_TERMINATION", "-1:05:00") + setenvs_from_dict( + monkeypatch, {"EC2_INSTANCES_TIME_BEFORE_TERMINATION": "-1:05:00"} + ) settings = ApplicationSettings.create_from_envs() assert settings.AUTOSCALING_EC2_INSTANCES assert ( @@ -69,31 +110,47 @@ def test_invalid_EC2_INSTANCES_TIME_BEFORE_TERMINATION( ) -def test_EC2_INSTANCES_PRE_PULL_IMAGES( - app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch +def test_EC2_INSTANCES_PRE_PULL_IMAGES( # noqa: N802 + app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch, faker: Faker ): settings = ApplicationSettings.create_from_envs() assert settings.AUTOSCALING_EC2_INSTANCES - assert not settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_PRE_PULL_IMAGES # passing an invalid image tag name will fail - monkeypatch.setenv( - "EC2_INSTANCES_PRE_PULL_IMAGES", json.dumps(["io.simcore.some234.cool-"]) + setenvs_from_dict( + monkeypatch, + { + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + { + "t2.micro": { + "ami_id": faker.pystr(), + "pre_pull_images": ["io.simcore.some234.cool-"], + } + } + ) + }, ) - settings = ApplicationSettings.create_from_envs() - assert not settings.AUTOSCALING_EC2_INSTANCES + with pytest.raises(ValidationError): + ApplicationSettings.create_from_envs() # passing a valid will pass - monkeypatch.setenv( - "EC2_INSTANCES_PRE_PULL_IMAGES", - json.dumps( - [ - "nginx:latest", - "itisfoundation/my-very-nice-service:latest", - "simcore/services/dynamic/another-nice-one:2.4.5", - "asd", - ] - ), + setenvs_from_dict( + monkeypatch, + { + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + { + "t2.micro": { + "ami_id": faker.pystr(), + "pre_pull_images": [ + "nginx:latest", + "itisfoundation/my-very-nice-service:latest", + "simcore/services/dynamic/another-nice-one:2.4.5", + "asd", + ], + } + } + ), + }, ) settings = ApplicationSettings.create_from_envs() assert settings.AUTOSCALING_EC2_INSTANCES @@ -102,4 +159,6 @@ def test_EC2_INSTANCES_PRE_PULL_IMAGES( "itisfoundation/my-very-nice-service:latest", "simcore/services/dynamic/another-nice-one:2.4.5", "asd", - ] == settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_PRE_PULL_IMAGES + ] == next( + iter(settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES.values()) + ).pre_pull_images diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py index c81c4968bb7..84a82229f08 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py +++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py @@ -262,9 +262,9 @@ async def test_cluster_scaling_from_labelled_services_with_no_services_and_machi ec2_client, num_reservations=1, num_instances=mock_machines_buffer, - instance_type=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES[ - 0 - ], + instance_type=next( + iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES) + ), instance_state="running", ) _assert_rabbit_autoscaling_message_sent( @@ -282,9 +282,9 @@ async def test_cluster_scaling_from_labelled_services_with_no_services_and_machi ec2_client, num_reservations=1, num_instances=mock_machines_buffer, - instance_type=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES[ - 0 - ], + instance_type=next( + iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES) + ), instance_state="running", ) assert fake_node.Description @@ -315,9 +315,9 @@ async def test_cluster_scaling_from_labelled_services_with_no_services_and_machi ec2_client, num_reservations=1, num_instances=mock_machines_buffer, - instance_type=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES[ - 0 - ], + instance_type=next( + iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES) + ), instance_state="running", ) diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_task.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_task.py index 7f3663ef7dc..8395792d052 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_task.py +++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_task.py @@ -32,11 +32,10 @@ def app_environment( @pytest.fixture def mock_background_task(mocker: MockerFixture) -> mock.Mock: - mocked_task = mocker.patch( + return mocker.patch( "simcore_service_autoscaling.modules.auto_scaling_task.auto_scale_cluster", autospec=True, ) - return mocked_task async def test_auto_scaling_task_not_created_if_no_mode_defined( diff --git a/services/autoscaling/tests/unit/test_modules_rabbitmq.py b/services/autoscaling/tests/unit/test_modules_rabbitmq.py index 13cb1f73ee9..f1cdf140174 100644 --- a/services/autoscaling/tests/unit/test_modules_rabbitmq.py +++ b/services/autoscaling/tests/unit/test_modules_rabbitmq.py @@ -3,7 +3,8 @@ # pylint:disable=redefined-outer-name import asyncio -from typing import Any, Callable, Mapping +from collections.abc import Callable, Mapping +from typing import Any import aiodocker import pytest @@ -28,12 +29,12 @@ from tenacity.stop import stop_after_delay from tenacity.wait import wait_fixed -_TENACITY_RETRY_PARAMS = dict( - reraise=True, - retry=retry_if_exception_type(AssertionError), - stop=stop_after_delay(30), - wait=wait_fixed(0.1), -) +_TENACITY_RETRY_PARAMS = { + "reraise": True, + "retry": retry_if_exception_type(AssertionError), + "stop": stop_after_delay(30), + "wait": wait_fixed(0.1), +} # Selection of core and tool services started in this swarm fixture (integration) pytest_simcore_core_services_selection = [ @@ -86,7 +87,7 @@ def test_rabbitmq_does_not_initialize_if_deactivated( initialized_app: FastAPI, ): assert hasattr(initialized_app.state, "rabbitmq_client") - assert initialized_app.state.rabbitmq_client == None + assert initialized_app.state.rabbitmq_client is None with pytest.raises(ConfigurationError): get_rabbitmq_client(initialized_app) diff --git a/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py b/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py index 345a757f575..48ba0286cfb 100644 --- a/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py +++ b/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py @@ -4,6 +4,7 @@ # pylint: disable=unused-variable # pylint: disable=too-many-arguments +import datetime import json import re from collections.abc import Callable @@ -13,6 +14,8 @@ from models_library.docker import DockerGenericTag from models_library.generated_models.docker_rest_api import Node from pydantic import parse_obj_as +from pytest_simcore.helpers.typing_env import EnvVarsDict +from pytest_simcore.helpers.utils_envs import setenvs_from_dict from simcore_service_autoscaling.core.errors import Ec2InvalidDnsNameError from simcore_service_autoscaling.core.settings import ApplicationSettings from simcore_service_autoscaling.models import EC2InstanceData @@ -135,10 +138,37 @@ def minimal_configuration( ... -async def test_ec2_startup_script_no_pre_pulling( - minimal_configuration: None, app_settings: ApplicationSettings +@pytest.fixture +def ec2_instances_boot_just_ami( + app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch, faker: Faker +) -> EnvVarsDict: + envs = setenvs_from_dict( + monkeypatch, + { + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + {"t2.micro": {"ami_id": faker.pystr()}} + ), + }, + ) + return app_environment | envs + + +async def test_ec2_startup_script_just_ami( + minimal_configuration: None, + ec2_instances_boot_just_ami: EnvVarsDict, + app_settings: ApplicationSettings, ): - startup_script = await ec2_startup_script(app_settings) + assert app_settings.AUTOSCALING_EC2_INSTANCES + instance_boot_specific = next( + iter( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES.values() + ) + ) + assert not instance_boot_specific.pre_pull_images + assert instance_boot_specific.pre_pull_images_cron_interval == datetime.timedelta( + minutes=30 + ) + startup_script = await ec2_startup_script(instance_boot_specific, app_settings) assert len(startup_script.split("&&")) == 1 assert re.fullmatch( r"^docker swarm join --availability=drain --token .*$", startup_script @@ -146,9 +176,30 @@ async def test_ec2_startup_script_no_pre_pulling( @pytest.fixture -def enabled_pre_pull_images( - minimal_configuration: None, monkeypatch: pytest.MonkeyPatch -) -> list[DockerGenericTag]: +def ec2_instances_boot_ami_scripts( + app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch, faker: Faker +) -> list[str]: + custom_scripts = faker.pylist(allowed_types=(str,)) + setenvs_from_dict( + monkeypatch, + { + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + { + "t2.micro": { + "ami_id": faker.pystr(), + "custom_boot_scripts": custom_scripts, + } + } + ), + }, + ) + return custom_scripts + + +@pytest.fixture +def ec2_instances_boot_ami_pre_pull( + app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch, faker: Faker +) -> EnvVarsDict: images = parse_obj_as( list[DockerGenericTag], [ @@ -158,23 +209,15 @@ def enabled_pre_pull_images( "asd", ], ) - monkeypatch.setenv( - "EC2_INSTANCES_PRE_PULL_IMAGES", - json.dumps(images), - ) - return images - - -@pytest.fixture -def enabled_custom_boot_scripts( - minimal_configuration: None, monkeypatch: pytest.MonkeyPatch, faker: Faker -) -> list[str]: - custom_scripts = faker.pylist(allowed_types=(str,)) - monkeypatch.setenv( - "EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS", - json.dumps(custom_scripts), + envs = setenvs_from_dict( + monkeypatch, + { + "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + {"t2.micro": {"ami_id": faker.pystr(), "pre_pull_images": images}} + ), + }, ) - return custom_scripts + return app_environment | envs @pytest.fixture @@ -184,10 +227,18 @@ def disabled_registry(monkeypatch: pytest.MonkeyPatch) -> None: async def test_ec2_startup_script_with_pre_pulling( minimal_configuration: None, - enabled_pre_pull_images: None, + ec2_instances_boot_ami_pre_pull: EnvVarsDict, app_settings: ApplicationSettings, ): - startup_script = await ec2_startup_script(app_settings) + assert app_settings.AUTOSCALING_EC2_INSTANCES + instance_boot_specific = next( + iter( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES.values() + ) + ) + assert instance_boot_specific.pre_pull_images + assert instance_boot_specific.pre_pull_images_cron_interval + startup_script = await ec2_startup_script(instance_boot_specific, app_settings) assert len(startup_script.split("&&")) == 7 assert re.fullmatch( r"^(docker swarm join [^&&]+) && (echo [^\s]+ \| docker login [^&&]+) && (echo [^&&]+) && (echo [^&&]+) && (chmod \+x [^&&]+) && (./docker-pull-script.sh) && (echo .+)$", @@ -197,26 +248,43 @@ async def test_ec2_startup_script_with_pre_pulling( async def test_ec2_startup_script_with_custom_scripts( minimal_configuration: None, - enabled_pre_pull_images: None, - enabled_custom_boot_scripts: list[str], + ec2_instances_boot_ami_scripts: list[str], app_settings: ApplicationSettings, ): for _ in range(3): - startup_script = await ec2_startup_script(app_settings) - assert len(startup_script.split("&&")) == 7 + len(enabled_custom_boot_scripts) + assert app_settings.AUTOSCALING_EC2_INSTANCES + instance_boot_specific = next( + iter( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES.values() + ) + ) + assert not instance_boot_specific.pre_pull_images + assert instance_boot_specific.pre_pull_images_cron_interval + startup_script = await ec2_startup_script(instance_boot_specific, app_settings) + assert len(startup_script.split("&&")) == 1 + len( + ec2_instances_boot_ami_scripts + ) assert re.fullmatch( - rf"^([^&&]+ &&){{{len(enabled_custom_boot_scripts)}}} (docker swarm join [^&&]+) && (echo [^\s]+ \| docker login [^&&]+) && (echo [^&&]+) && (echo [^&&]+) && (chmod \+x [^&&]+) && (./docker-pull-script.sh) && (echo .+)$", + rf"^([^&&]+ &&){{{len(ec2_instances_boot_ami_scripts)}}} (docker swarm join .+)$", startup_script, ), f"{startup_script=}" async def test_ec2_startup_script_with_pre_pulling_but_no_registry( minimal_configuration: None, - enabled_pre_pull_images: None, + ec2_instances_boot_ami_pre_pull: EnvVarsDict, disabled_registry: None, app_settings: ApplicationSettings, ): - startup_script = await ec2_startup_script(app_settings) + assert app_settings.AUTOSCALING_EC2_INSTANCES + instance_boot_specific = next( + iter( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES.values() + ) + ) + assert instance_boot_specific.pre_pull_images + assert instance_boot_specific.pre_pull_images_cron_interval + startup_script = await ec2_startup_script(instance_boot_specific, app_settings) assert len(startup_script.split("&&")) == 1 assert re.fullmatch( r"^docker swarm join --availability=drain --token .*$", startup_script diff --git a/services/autoscaling/tests/unit/test_utils_docker.py b/services/autoscaling/tests/unit/test_utils_docker.py index e9ee5c21c27..73d8719cb18 100644 --- a/services/autoscaling/tests/unit/test_utils_docker.py +++ b/services/autoscaling/tests/unit/test_utils_docker.py @@ -605,7 +605,7 @@ async def test_compute_node_used_resources_with_service( faker: Faker, ): # 1. if we have services with no defined reservations, then we cannot know what they use... - service_with_no_resources = await create_service(task_template, {}, "running") + await create_service(task_template, {}, "running") node_used_resources = await compute_node_used_resources( autoscaling_docker, host_node ) @@ -638,7 +638,7 @@ async def test_compute_node_used_resources_with_service( node_used_resources = await compute_node_used_resources( autoscaling_docker, host_node, - service_labels=[random.choice(list(service_labels.keys()))], + service_labels=[random.choice(list(service_labels.keys()))], # noqa: S311 ) assert node_used_resources == Resources(cpus=host_cpu_count, ram=ByteSize(0)) # 4. if we look for services with all the correct labels, they should then become visible again diff --git a/services/autoscaling/tests/unit/test_utils_rabbitmq.py b/services/autoscaling/tests/unit/test_utils_rabbitmq.py index e5350a393b0..3ec04e7a1db 100644 --- a/services/autoscaling/tests/unit/test_utils_rabbitmq.py +++ b/services/autoscaling/tests/unit/test_utils_rabbitmq.py @@ -4,7 +4,8 @@ # pylint:disable=too-many-arguments -from typing import Any, Awaitable, Callable +from collections.abc import Awaitable, Callable +from typing import Any import aiodocker from faker import Faker @@ -29,12 +30,12 @@ from tenacity.stop import stop_after_delay from tenacity.wait import wait_fixed -_TENACITY_RETRY_PARAMS = dict( - reraise=True, - retry=retry_if_exception_type(AssertionError), - stop=stop_after_delay(30), - wait=wait_fixed(0.1), -) +_TENACITY_RETRY_PARAMS = { + "reraise": True, + "retry": retry_if_exception_type(AssertionError), + "stop": stop_after_delay(30), + "wait": wait_fixed(0.1), +} # Selection of core and tool services started in this swarm fixture (integration) diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py index 491cdeec854..3702227670b 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py @@ -2,6 +2,7 @@ from functools import cached_property from typing import Any, ClassVar, Final, cast +from aws_library.ec2.models import EC2InstanceBootSpecific from fastapi import FastAPI from models_library.basic_types import ( BootModeEnum, @@ -31,7 +32,7 @@ class Config(EC2Settings.Config): "examples": [ { f"{CLUSTERS_KEEPER_ENV_PREFIX}EC2_ACCESS_KEY_ID": "my_access_key_id", - f"{CLUSTERS_KEEPER_ENV_PREFIX}EC2_ENDPOINT": "http://my_ec2_endpoint.com", + f"{CLUSTERS_KEEPER_ENV_PREFIX}EC2_ENDPOINT": "https://my_ec2_endpoint.com", f"{CLUSTERS_KEEPER_ENV_PREFIX}EC2_REGION_NAME": "us-east-1", f"{CLUSTERS_KEEPER_ENV_PREFIX}EC2_SECRET_ACCESS_KEY": "my_secret_access_key", } @@ -40,21 +41,29 @@ class Config(EC2Settings.Config): class WorkersEC2InstancesSettings(BaseCustomSettings): - WORKERS_EC2_INSTANCES_ALLOWED_TYPES: list[str] = Field( + WORKERS_EC2_INSTANCES_ALLOWED_TYPES: dict[str, EC2InstanceBootSpecific] = Field( ..., - min_items=1, - unique_items=True, - description="Defines which EC2 instances are considered as candidates for new EC2 instance", + description="Defines which EC2 instances are considered as candidates for new EC2 instance and their respective boot specific parameters", ) - WORKERS_EC2_INSTANCES_AMI_ID: str = Field( + + WORKERS_EC2_INSTANCES_KEY_NAME: str = Field( ..., min_length=1, - description="Defines the AMI (Amazon Machine Image) ID used to start a new EC2 instance", + description="SSH key filename (without ext) to access the instance through SSH" + " (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html)," + "this is required to start a new EC2 instance", + ) + # BUFFER is not exposed since we set it to 0 + WORKERS_EC2_INSTANCES_MAX_START_TIME: datetime.timedelta = Field( + default=datetime.timedelta(minutes=3), + description="Usual time taken an EC2 instance with the given AMI takes to be in 'running' mode " + "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", ) WORKERS_EC2_INSTANCES_MAX_INSTANCES: int = Field( default=10, description="Defines the maximum number of instances the clusters_keeper app may create", ) + # NAME PREFIX is not exposed since we override it anyway WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS: list[str] = Field( ..., min_items=1, @@ -69,13 +78,6 @@ class WorkersEC2InstancesSettings(BaseCustomSettings): " (https://docs.aws.amazon.com/vpc/latest/userguide/configure-subnets.html), " "this is required to start a new EC2 instance", ) - WORKERS_EC2_INSTANCES_KEY_NAME: str = Field( - ..., - min_length=1, - description="SSH key filename (without ext) to access the instance through SSH" - " (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html)," - "this is required to start a new EC2 instance", - ) WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION: datetime.timedelta = Field( default=datetime.timedelta(minutes=3), @@ -83,24 +85,18 @@ class WorkersEC2InstancesSettings(BaseCustomSettings): "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", ) - WORKERS_EC2_INSTANCES_MAX_START_TIME: datetime.timedelta = Field( - default=datetime.timedelta(minutes=3), - description="Usual time taken an EC2 instance with the given AMI takes to be in 'running' mode " - "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", - ) - - WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS: list[str] = Field( - default_factory=list, - description="script(s) to run on EC2 instance startup (be careful!), each entry is run one after the other using '&&' operator", - ) - @validator("WORKERS_EC2_INSTANCES_ALLOWED_TYPES") @classmethod - def check_valid_intance_names(cls, value): + def check_valid_instance_names( + cls, value: dict[str, EC2InstanceBootSpecific] + ) -> dict[str, EC2InstanceBootSpecific]: # NOTE: needed because of a flaw in BaseCustomSettings # issubclass raises TypeError if used on Aliases - parse_obj_as(tuple[InstanceTypeType, ...], value) - return value + if all(parse_obj_as(InstanceTypeType, key) for key in value): + return value + + msg = "Invalid instance type name" + raise ValueError(msg) class PrimaryEC2InstancesSettings(BaseCustomSettings): diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml index 213aac41821..bb4e22973ee 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml @@ -47,20 +47,20 @@ services: hostname: "{{.Node.Hostname}}-{{.Service.Name}}-{{.Task.Slot}}" environment: DASK_MONITORING_URL: tcp://dask-scheduler:8786 - EC2_ACCESS_KEY_ID: ${CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID} - EC2_ENDPOINT: ${CLUSTERS_KEEPER_EC2_ENDPOINT} + AUTOSCALING_EC2_ACCESS_KEY_ID: ${CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID} + AUTOSCALING_EC2_ENDPOINT: ${CLUSTERS_KEEPER_EC2_ENDPOINT} + AUTOSCALING_EC2_REGION_NAME: ${CLUSTERS_KEEPER_EC2_REGION_NAME} + AUTOSCALING_EC2_SECRET_ACCESS_KEY: ${CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY} + AUTOSCALING_NODES_MONITORING: null + AUTOSCALING_POLL_INTERVAL: 10 EC2_INSTANCES_ALLOWED_TYPES: ${WORKERS_EC2_INSTANCES_ALLOWED_TYPES} - EC2_INSTANCES_AMI_ID: ${WORKERS_EC2_INSTANCES_AMI_ID} - EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS: ${WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS} EC2_INSTANCES_KEY_NAME: ${WORKERS_EC2_INSTANCES_KEY_NAME} + EC2_INSTANCES_MACHINES_BUFFER: 0 EC2_INSTANCES_MAX_INSTANCES: ${WORKERS_EC2_INSTANCES_MAX_INSTANCES} - EC2_INSTANCES_MAX_START_TIME: ${WORKERS_EC2_INSTANCES_MAX_START_TIME} EC2_INSTANCES_NAME_PREFIX: ${EC2_INSTANCES_NAME_PREFIX} EC2_INSTANCES_SECURITY_GROUP_IDS: ${WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS} EC2_INSTANCES_SUBNET_ID: ${WORKERS_EC2_INSTANCES_SUBNET_ID} EC2_INSTANCES_TIME_BEFORE_TERMINATION: ${WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION} - EC2_REGION_NAME: ${CLUSTERS_KEEPER_EC2_REGION_NAME} - EC2_SECRET_ACCESS_KEY: ${CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY} LOG_FORMAT_LOCAL_DEV_ENABLED: 1 LOG_LEVEL: ${LOG_LEVEL:-WARNING} REDIS_HOST: redis diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py index 805d793a919..5bc8a44aa17 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py @@ -1,9 +1,11 @@ import base64 import datetime import functools +import json from typing import Any, Final from aws_library.ec2.models import EC2InstanceData +from fastapi.encoders import jsonable_encoder from models_library.api_schemas_clusters_keeper.clusters import ( ClusterState, OnDemandCluster, @@ -38,22 +40,22 @@ def _convert_to_env_list(entries: list[Any]) -> str: entries_as_str = ",".join(rf"\"{k}\"" for k in entries) return f"[{entries_as_str}]" + def _convert_to_env_dict(entries: dict[str, Any]) -> str: + return f"'{json.dumps(jsonable_encoder(entries))}'" + environment_variables = [ f"DOCKER_IMAGE_TAG={app_settings.CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG}", f"CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_ACCESS_KEY_ID}", f"CLUSTERS_KEEPER_EC2_ENDPOINT={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_ENDPOINT}", - f"WORKERS_EC2_INSTANCES_ALLOWED_TYPES={_convert_to_env_list(app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_ALLOWED_TYPES)}", - f"WORKERS_EC2_INSTANCES_AMI_ID={app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_AMI_ID}", - f"WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS={_convert_to_env_list(app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS)}", + f"CLUSTERS_KEEPER_EC2_REGION_NAME={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_REGION_NAME}", + f"CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_SECRET_ACCESS_KEY}", + f"WORKERS_EC2_INSTANCES_ALLOWED_TYPES={_convert_to_env_dict(app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_ALLOWED_TYPES)}", f"WORKERS_EC2_INSTANCES_KEY_NAME={app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_KEY_NAME}", f"WORKERS_EC2_INSTANCES_MAX_INSTANCES={app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_MAX_INSTANCES}", - f"WORKERS_EC2_INSTANCES_MAX_START_TIME={app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_MAX_START_TIME}", f"EC2_INSTANCES_NAME_PREFIX={cluster_machines_name_prefix}", f"WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS={_convert_to_env_list(app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS)}", f"WORKERS_EC2_INSTANCES_SUBNET_ID={app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_SUBNET_ID}", f"WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION={app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES.WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION}", - f"CLUSTERS_KEEPER_EC2_REGION_NAME={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_REGION_NAME}", - f"CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_SECRET_ACCESS_KEY}", f"LOG_LEVEL={app_settings.LOG_LEVEL}", ] diff --git a/services/clusters-keeper/tests/manual/README.md b/services/clusters-keeper/tests/manual/README.md index a2ecd57c8a0..21f203c86c4 100644 --- a/services/clusters-keeper/tests/manual/README.md +++ b/services/clusters-keeper/tests/manual/README.md @@ -63,7 +63,7 @@ CLUSTERS_KEEPER_EC2_ACCESS={} CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID=XXXXXXX CLUSTERS_KEEPER_EC2_ENDPOINT=https://ec2.amazonaws.com CLUSTERS_KEEPER_EC2_REGION_NAME=us-east-1 -CLUSTERS_KEEPER_SECRET_EC2_ACCESS_KEY=XXXXXXX +CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY=XXXXXXX CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES={} PRIMARY_EC2_INSTANCES_ALLOWED_TYPES="[\"t2.micro\"]" @@ -74,13 +74,12 @@ PRIMARY_EC2_INSTANCES_SECURITY_GROUP_IDS="[\"XXXXXXX\"]" PRIMARY_EC2_INSTANCES_SUBNET_ID=XXXXXXX CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES={} -WORKERS_EC2_INSTANCES_ALLOWED_TYPES="[\"g4dn.xlarge\"]" # will change in some next PR -WORKERS_EC2_INSTANCES_AMI_ID=XXXXXXX -WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS="[]" +WORKERS_EC2_INSTANCES_ALLOWED_TYPES='{"g4dn.xlarge": {"ami_id": "XXXXXXXX", "custom_boot_scripts": ["whoami"], "pre_pull_images": ["ubuntu:latest"]}}' WORKERS_EC2_INSTANCES_KEY_NAME=XXXXXXX WORKERS_EC2_INSTANCES_MAX_INSTANCES=10 WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS="[\"XXXXXXX\"]" WORKERS_EC2_INSTANCES_SUBNET_ID=XXXXXXX +WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION="00:03:00" ``` 5. start osparc diff --git a/services/clusters-keeper/tests/unit/conftest.py b/services/clusters-keeper/tests/unit/conftest.py index 26c97e27084..82803bc03da 100644 --- a/services/clusters-keeper/tests/unit/conftest.py +++ b/services/clusters-keeper/tests/unit/conftest.py @@ -4,6 +4,7 @@ import importlib.resources import json +import random from collections.abc import AsyncIterator, Awaitable, Callable, Iterator from pathlib import Path from typing import Any @@ -15,7 +16,7 @@ import simcore_service_clusters_keeper.data import yaml from asgi_lifespan import LifespanManager -from aws_library.ec2.client import SimcoreEC2API +from aws_library.ec2.models import EC2InstanceBootSpecific from faker import Faker from fakeredis.aioredis import FakeRedis from fastapi import FastAPI @@ -106,8 +107,14 @@ def app_environment( "PRIMARY_EC2_INSTANCES_AMI_ID": faker.pystr(), "PRIMARY_EC2_INSTANCES_ALLOWED_TYPES": json.dumps(ec2_instances), "CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES": "{}", - "WORKERS_EC2_INSTANCES_ALLOWED_TYPES": json.dumps(ec2_instances), - "WORKERS_EC2_INSTANCES_AMI_ID": faker.pystr(), + "WORKERS_EC2_INSTANCES_ALLOWED_TYPES": json.dumps( + { + ec2_type_name: random.choice( # noqa: S311 + EC2InstanceBootSpecific.Config.schema_extra["examples"] + ) + for ec2_type_name in ec2_instances + } + ), "WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS": json.dumps( faker.pylist(allowed_types=(str,)) ), @@ -162,10 +169,7 @@ def disable_clusters_management_background_task( @pytest.fixture def disabled_rabbitmq(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch): - monkeypatch.delenv("RABBIT_HOST") - monkeypatch.delenv("RABBIT_USER") - monkeypatch.delenv("RABBIT_SECURE") - monkeypatch.delenv("RABBIT_PASSWORD") + monkeypatch.setenv("CLUSTERS_KEEPER_RABBITMQ", "null") @pytest.fixture @@ -204,36 +208,6 @@ async def async_client(initialized_app: FastAPI) -> AsyncIterator[httpx.AsyncCli yield client -@pytest.fixture -def aws_allowed_ec2_instance_type_names_env( - app_environment: EnvVarsDict, - monkeypatch: pytest.MonkeyPatch, -) -> EnvVarsDict: - changed_envs = { - "PRIMARY_EC2_INSTANCES_ALLOWED_TYPES": json.dumps( - [ - "t2.xlarge", - "t2.2xlarge", - "g3.4xlarge", - "r5n.4xlarge", - "r5n.8xlarge", - ] - ), - } - return app_environment | setenvs_from_dict(monkeypatch, changed_envs) - - -@pytest.fixture -async def clusters_keeper_ec2( - app_environment: EnvVarsDict, -) -> AsyncIterator[SimcoreEC2API]: - settings = EC2Settings.create_from_envs() - ec2 = await SimcoreEC2API.create(settings) - assert ec2 - yield ec2 - await ec2.close() - - @pytest.fixture async def mocked_redis_server(mocker: MockerFixture) -> None: mock_redis = FakeRedis() diff --git a/services/clusters-keeper/tests/unit/test_utils_clusters.py b/services/clusters-keeper/tests/unit/test_utils_clusters.py index 3e772896da0..80bae98c0b7 100644 --- a/services/clusters-keeper/tests/unit/test_utils_clusters.py +++ b/services/clusters-keeper/tests/unit/test_utils_clusters.py @@ -49,10 +49,11 @@ def test_create_startup_script( startup_script.splitlines()[-1].split("docker stack deploy")[0].strip() ) assert startup_script_envs_definition - startup_script_env_keys_names = { - entry.split("=", maxsplit=1)[0]: entry.split("=", maxsplit=1)[1] - for entry in startup_script_envs_definition.split(" ") - } + # Use regular expression to split the string into key-value pairs (courtesy of chatGPT) + startup_script_key_value_pairs: list[tuple[str, str]] = re.findall( + r"(\S+)=([\S\s]+?)(?=\S+=|$)", startup_script_envs_definition + ) + startup_script_env_keys_names = [key for key, _ in startup_script_key_value_pairs] # docker-compose expected values assert "services" in clusters_keeper_docker_compose assert "autoscaling" in clusters_keeper_docker_compose["services"] @@ -79,8 +80,6 @@ def test_create_startup_script( # check lists have \" written in them list_settings = [ - "WORKERS_EC2_INSTANCES_ALLOWED_TYPES", - "WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS", "WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS", ] assert all( diff --git a/services/docker-compose.yml b/services/docker-compose.yml index e8f63bbaaf7..c97297d37d7 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -35,10 +35,44 @@ services: networks: - autoscaling_subnet environment: - - LOG_LEVEL=${LOG_LEVEL:-WARNING} + - AUTOSCALING_LOGLEVEL=${AUTOSCALING_LOGLEVEL:-${LOG_LEVEL:-WARNING}} + - AUTOSCALING_POLL_INTERVAL=${AUTOSCALING_POLL_INTERVAL} + + - AUTOSCALING_DASK=${AUTOSCALING_DASK} # comp autoscaling + - DASK_MONITORING_URL=${DASK_MONITORING_URL} + + - AUTOSCALING_EC2_ACCESS=${AUTOSCALING_EC2_ACCESS} # used to enable/disable + - AUTOSCALING_EC2_ACCESS_KEY_ID=${AUTOSCALING_EC2_ACCESS_KEY_ID} + - AUTOSCALING_EC2_SECRET_ACCESS_KEY=${AUTOSCALING_EC2_SECRET_ACCESS_KEY} + - AUTOSCALING_EC2_REGION_NAME=${AUTOSCALING_EC2_REGION_NAME} + + - AUTOSCALING_EC2_INSTANCES=${AUTOSCALING_EC2_INSTANCES} # used to enable/disable + - EC2_INSTANCES_ALLOWED_TYPES=${EC2_INSTANCES_ALLOWED_TYPES} + - EC2_INSTANCES_MACHINES_BUFFER=${EC2_INSTANCES_MACHINES_BUFFER} + - EC2_INSTANCES_MAX_INSTANCES=${EC2_INSTANCES_MAX_INSTANCES} + - EC2_INSTANCES_NAME_PREFIX=${EC2_INSTANCES_NAME_PREFIX} + - EC2_INSTANCES_SECURITY_GROUP_IDS=${EC2_INSTANCES_SECURITY_GROUP_IDS} + - EC2_INSTANCES_SUBNET_ID=${EC2_INSTANCES_SUBNET_ID} + - EC2_INSTANCES_KEY_NAME=${EC2_INSTANCES_KEY_NAME} + + - AUTOSCALING_NODES_MONITORING=${AUTOSCALING_NODES_MONITORING} # dyn autoscaling + - NODES_MONITORING_NODE_LABELS=${NODES_MONITORING_NODE_LABELS} + - NODES_MONITORING_SERVICE_LABELS=${NODES_MONITORING_SERVICE_LABELS} + - NODES_MONITORING_NEW_NODES_LABELS=${NODES_MONITORING_NEW_NODES_LABELS} + - LOG_FORMAT_LOCAL_DEV_ENABLED=${LOG_FORMAT_LOCAL_DEV_ENABLED} + - RABBIT_HOST=${RABBIT_HOST} + - RABBIT_PASSWORD=${RABBIT_PASSWORD} + - RABBIT_PORT=${RABBIT_PORT} + - RABBIT_SECURE=${RABBIT_SECURE} + - RABBIT_USER=${RABBIT_USER} - REDIS_HOST=${REDIS_HOST} - REDIS_PORT=${REDIS_PORT} + - REGISTRY_USER=${REGISTRY_USER} + - REGISTRY_PW=${REGISTRY_PW} + - REGISTRY_URL=${REGISTRY_URL} + - REGISTRY_SSL=${REGISTRY_SSL} + - REGISTRY_AUTH=${REGISTRY_AUTH} volumes: - "/var/run/docker.sock:/var/run/docker.sock" deploy: @@ -89,7 +123,7 @@ services: - CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID=${CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID} - CLUSTERS_KEEPER_EC2_ENDPOINT=${CLUSTERS_KEEPER_EC2_ENDPOINT} - CLUSTERS_KEEPER_EC2_REGION_NAME=${CLUSTERS_KEEPER_EC2_REGION_NAME} - - CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY=${CLUSTERS_KEEPER_} + - CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY=${CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY} - LOG_FORMAT_LOCAL_DEV_ENABLED=${LOG_FORMAT_LOCAL_DEV_ENABLED} - CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES=${CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES} - PRIMARY_EC2_INSTANCES_ALLOWED_TYPES=${PRIMARY_EC2_INSTANCES_ALLOWED_TYPES} @@ -108,8 +142,6 @@ services: - SWARM_STACK_NAME=${SWARM_STACK_NAME} - CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES=${CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES} - WORKERS_EC2_INSTANCES_ALLOWED_TYPES=${WORKERS_EC2_INSTANCES_ALLOWED_TYPES} - - WORKERS_EC2_INSTANCES_AMI_ID=${WORKERS_EC2_INSTANCES_AMI_ID} - - WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS=${WORKERS_EC2_INSTANCES_CUSTOM_BOOT_SCRIPTS} - WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION=${WORKERS_EC2_INSTANCES_TIME_BEFORE_TERMINATION} - WORKERS_EC2_INSTANCES_KEY_NAME=${WORKERS_EC2_INSTANCES_KEY_NAME} - WORKERS_EC2_INSTANCES_MAX_INSTANCES=${WORKERS_EC2_INSTANCES_MAX_INSTANCES}