From df52587bef40f55ed435e27ecd6cb748203d379a Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 24 May 2023 16:46:30 +0200 Subject: [PATCH] attach-time tenant config (#4255) This PR adds support for supplying the tenant config upon /attach. Before this change, when relocating a tenant using `/detach` and `/attach`, the tenant config after `/attach` would be the default config from `pageserver.toml`. That is undesirable for settings such as the PITR-interval: if the tenant's config on the source was `30 days` and the default config on the attach-side is `7 days`, then the first GC run would eradicate 23 days worth of PITR capability. The API change is backwards-compatible: if the body is empty, we continue to use the default config. We'll remove that capability as soon as the cloud.git code is updated to use attach-time tenant config (https://github.com/neondatabase/neon/issues/4282 keeps track of this). unblocks https://github.com/neondatabase/cloud/issues/5092 fixes https://github.com/neondatabase/neon/issues/1555 part of https://github.com/neondatabase/neon/issues/886 (Tenant Relocation) Implementation ============== The preliminary PRs for this work were (most-recent to least-recent) * https://github.com/neondatabase/neon/pull/4279 * https://github.com/neondatabase/neon/pull/4267 * https://github.com/neondatabase/neon/pull/4252 * https://github.com/neondatabase/neon/pull/4235 --- libs/pageserver_api/src/models.rs | 34 +++ libs/utils/src/http/json.rs | 18 +- pageserver/src/http/openapi_spec.yml | 27 ++- pageserver/src/http/routes.rs | 16 +- test_runner/fixtures/pageserver/http.py | 23 +- .../regress/test_attach_tenant_config.py | 200 ++++++++++++++++++ 6 files changed, 307 insertions(+), 11 deletions(-) create mode 100644 test_runner/regress/test_attach_tenant_config.py diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 3bfedd14ea5b..3927ba3dadf7 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -234,6 +234,28 @@ impl TenantConfigRequest { } } +#[derive(Debug, Serialize, Deserialize)] +pub struct TenantAttachRequest { + pub config: TenantAttachConfig, +} + +/// Newtype to enforce deny_unknown_fields on TenantConfig for +/// its usage inside `TenantAttachRequest`. +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct TenantAttachConfig { + #[serde(flatten)] + allowing_unknown_fields: TenantConfig, +} + +impl std::ops::Deref for TenantAttachConfig { + type Target = TenantConfig; + + fn deref(&self) -> &Self::Target { + &self.allowing_unknown_fields + } +} + /// See [`TenantState::attachment_status`] and the OpenAPI docs for context. #[derive(Serialize, Deserialize, Clone)] #[serde(rename_all = "snake_case")] @@ -796,5 +818,17 @@ mod tests { "expect unknown field `unknown_field` error, got: {}", err ); + + let attach_request = json!({ + "config": { + "unknown_field": "unknown_value".to_string(), + }, + }); + let err = serde_json::from_value::(attach_request).unwrap_err(); + assert!( + err.to_string().contains("unknown field `unknown_field`"), + "expect unknown field `unknown_field` error, got: {}", + err + ); } } diff --git a/libs/utils/src/http/json.rs b/libs/utils/src/http/json.rs index 8981fdd1dda6..9c153033cb63 100644 --- a/libs/utils/src/http/json.rs +++ b/libs/utils/src/http/json.rs @@ -8,12 +8,26 @@ use super::error::ApiError; pub async fn json_request Deserialize<'de>>( request: &mut Request, ) -> Result { - let whole_body = hyper::body::aggregate(request.body_mut()) + json_request_or_empty_body(request) + .await? + .context("missing request body") + .map_err(ApiError::BadRequest) +} + +/// Will be removed as part of https://github.com/neondatabase/neon/issues/4282 +pub async fn json_request_or_empty_body Deserialize<'de>>( + request: &mut Request, +) -> Result, ApiError> { + let body = hyper::body::aggregate(request.body_mut()) .await .context("Failed to read request body") .map_err(ApiError::BadRequest)?; - serde_json::from_reader(whole_body.reader()) + if body.remaining() == 0 { + return Ok(None); + } + serde_json::from_reader(body.reader()) .context("Failed to parse json request") + .map(Some) .map_err(ApiError::BadRequest) } diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 0d0960365086..e23d3f3a207d 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -363,11 +363,29 @@ paths: * MUST NOT ASSUME that the request has been lost, based on the observation that a subsequent tenant status request returns 404. The request may still be in flight. It must be retried. + + The client SHOULD supply a `TenantConfig` for the tenant in the request body. + Settings specified in the config override the pageserver's defaults. + It is guaranteed that the config settings are applied before the pageserver + starts operating on the tenant. E.g., if the config specifies a specific + PITR interval for a tenant, then that setting will be in effect before the + pageserver starts the garbage collection loop. This enables a client to + guarantee a specific PITR setting across detach/attach cycles. + The pageserver will reject the request if it cannot parse the config, or + if there are any unknown fields in it. + + If the client does not supply a config, the pageserver will use its defaults. + This behavior is deprecated: https://github.com/neondatabase/neon/issues/4282 + requestBody: + required: false + content: + application/json: + schema: + $ref: "#/components/schemas/TenantAttachRequest" responses: "202": description: Tenant attaching scheduled "400": - description: Error when no tenant id found in path parameters content: application/json: schema: @@ -922,6 +940,13 @@ components: new_tenant_id: type: string format: hex + TenantAttachRequest: + type: object + required: + - config + properties: + config: + $ref: '#/components/schemas/TenantConfig' TenantConfigRequest: allOf: - $ref: '#/components/schemas/TenantConfig' diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 7d60d3568a8e..83d478ac3d52 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -5,12 +5,13 @@ use anyhow::{anyhow, Context, Result}; use hyper::StatusCode; use hyper::{Body, Request, Response, Uri}; use metrics::launch_timestamp::LaunchTimestamp; -use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest; +use pageserver_api::models::{DownloadRemoteLayersTaskSpawnRequest, TenantAttachRequest}; use remote_storage::GenericRemoteStorage; use tenant_size_model::{SizeResult, StorageModel}; use tokio_util::sync::CancellationToken; use tracing::*; use utils::http::endpoint::RequestSpan; +use utils::http::json::json_request_or_empty_body; use utils::http::request::{get_request_param, must_get_query_param, parse_query_param}; use super::models::{ @@ -386,11 +387,16 @@ async fn get_lsn_by_timestamp_handler(request: Request) -> Result) -> Result, ApiError> { +async fn tenant_attach_handler(mut request: Request) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; check_permission(&request, Some(tenant_id))?; + let maybe_body: Option = json_request_or_empty_body(&mut request).await?; + let tenant_conf = match maybe_body { + Some(request) => TenantConfOpt::try_from(&*request.config).map_err(ApiError::BadRequest)?, + None => TenantConfOpt::default(), + }; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn); info!("Handling tenant attach {tenant_id}"); @@ -401,9 +407,7 @@ async fn tenant_attach_handler(request: Request) -> Result, mgr::attach_tenant( state.conf, tenant_id, - // XXX: Attach should provide the config, especially during tenant migration. - // See https://github.com/neondatabase/neon/issues/1555 - TenantConfOpt::default(), + tenant_conf, remote_storage.clone(), &ctx, ) diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 1349923cc401..12720478813b 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import time from collections import defaultdict from dataclasses import dataclass @@ -109,6 +110,10 @@ def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Option if auth_token is not None: self.headers["Authorization"] = f"Bearer {auth_token}" + @property + def base_url(self) -> str: + return f"http://localhost:{self.port}" + def verbose_error(self, res: requests.Response): try: res.raise_for_status() @@ -168,8 +173,22 @@ def tenant_create( assert isinstance(new_tenant_id, str) return TenantId(new_tenant_id) - def tenant_attach(self, tenant_id: TenantId): - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach") + def tenant_attach( + self, tenant_id: TenantId, config: None | Dict[str, Any] = None, config_null: bool = False + ): + if config_null: + assert config is None + body = "null" + else: + # null-config is prohibited by the API + if config is None: + config = {} + body = json.dumps({"config": config}) + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach", + data=body, + headers={"Content-Type": "application/json"}, + ) self.verbose_error(res) def tenant_detach(self, tenant_id: TenantId, detach_ignored=False): diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py new file mode 100644 index 000000000000..eb2ba3e9ed1e --- /dev/null +++ b/test_runner/regress/test_attach_tenant_config.py @@ -0,0 +1,200 @@ +from dataclasses import dataclass +from typing import Generator, Optional + +import pytest +from fixtures.neon_fixtures import ( + LocalFsStorage, + NeonEnv, + NeonEnvBuilder, + RemoteStorageKind, +) +from fixtures.pageserver.http import PageserverApiException, TenantConfig +from fixtures.types import TenantId +from fixtures.utils import wait_until + + +@pytest.fixture +def positive_env(neon_env_builder: NeonEnvBuilder) -> NeonEnv: + neon_env_builder.enable_remote_storage( + remote_storage_kind=RemoteStorageKind.LOCAL_FS, + test_name="test_attach_tenant_config", + ) + env = neon_env_builder.init_start() + assert isinstance(env.remote_storage, LocalFsStorage) + return env + + +@dataclass +class NegativeTests: + neon_env: NeonEnv + tenant_id: TenantId + config_pre_detach: TenantConfig + + +@pytest.fixture +def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, None, None]: + neon_env_builder.enable_remote_storage( + remote_storage_kind=RemoteStorageKind.LOCAL_FS, + test_name="test_attach_tenant_config", + ) + env = neon_env_builder.init_start() + assert isinstance(env.remote_storage, LocalFsStorage) + + ps_http = env.pageserver.http_client() + (tenant_id, _) = env.neon_cli.create_tenant() + assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {} + config_pre_detach = ps_http.tenant_config(tenant_id) + assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()] + ps_http.tenant_detach(tenant_id) + assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()] + + yield NegativeTests(env, tenant_id, config_pre_detach) + + assert tenant_id not in [ + TenantId(t["id"]) for t in ps_http.tenant_list() + ], "tenant should not be attached after negative test" + + env.pageserver.allowed_errors.append(".*Error processing HTTP request: Bad request") + + def log_contains_bad_request(): + env.pageserver.log_contains(".*Error processing HTTP request: Bad request") + + wait_until(50, 0.1, log_contains_bad_request) + + +def test_null_body(negative_env: NegativeTests): + """ + If we send `null` in the body, the request should be rejected with status 400. + """ + env = negative_env.neon_env + tenant_id = negative_env.tenant_id + ps_http = env.pageserver.http_client() + + res = ps_http.post( + f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", + data=b"null", + headers={"Content-Type": "application/json"}, + ) + assert res.status_code == 400 + + +def test_null_config(negative_env: NegativeTests): + """ + If the `config` field is `null`, the request should be rejected with status 400. + """ + + env = negative_env.neon_env + tenant_id = negative_env.tenant_id + ps_http = env.pageserver.http_client() + + res = ps_http.post( + f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", + data=b'{"config": null}', + headers={"Content-Type": "application/json"}, + ) + assert res.status_code == 400 + + +def test_config_with_unknown_keys_is_bad_request(negative_env: NegativeTests): + """ + If we send a config with unknown keys, the request should be rejected with status 400. + """ + + env = negative_env.neon_env + tenant_id = negative_env.tenant_id + ps_http = env.pageserver.http_client() + + config_with_unknown_keys = { + "compaction_period": "1h", + "this_key_does_not_exist": "some value", + } + + with pytest.raises(PageserverApiException) as e: + ps_http.tenant_attach(tenant_id, config=config_with_unknown_keys) + assert e.type == PageserverApiException + assert e.value.status_code == 400 + + +@pytest.mark.parametrize("content_type", [None, "application/json"]) +def test_empty_body(positive_env: NeonEnv, content_type: Optional[str]): + """ + For backwards-compatiblity: if we send an empty body, + the request should be accepted and the config should be the default config. + """ + env = positive_env + ps_http = env.pageserver.http_client() + (tenant_id, _) = env.neon_cli.create_tenant() + assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {} + config_pre_detach = ps_http.tenant_config(tenant_id) + assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()] + ps_http.tenant_detach(tenant_id) + assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()] + + ps_http.post( + f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", + data=b"", + headers=None if content_type else {"Content-Type": "application/json"}, + ).raise_for_status() + + assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {} + assert ps_http.tenant_config(tenant_id).effective_config == config_pre_detach.effective_config + + +def test_fully_custom_config(positive_env: NeonEnv): + """ + If we send a valid config in the body, the request should be accepted and the config should be applied. + """ + env = positive_env + + fully_custom_config = { + "compaction_period": "1h", + "compaction_threshold": 13, + "compaction_target_size": 1048576, + "checkpoint_distance": 10000, + "checkpoint_timeout": "13m", + "eviction_policy": { + "kind": "LayerAccessThreshold", + "period": "20s", + "threshold": "23h", + }, + "evictions_low_residence_duration_metric_threshold": "2days", + "gc_horizon": 23 * (1024 * 1024), + "gc_period": "2h 13m", + "image_creation_threshold": 7, + "pitr_interval": "1m", + "lagging_wal_timeout": "23m", + "max_lsn_wal_lag": 230000, + "min_resident_size_override": 23, + "trace_read_requests": True, + "walreceiver_connect_timeout": "13m", + } + + ps_http = env.pageserver.http_client() + + initial_tenant_config = ps_http.tenant_config(env.initial_tenant) + assert initial_tenant_config.tenant_specific_overrides == {} + assert set(initial_tenant_config.effective_config.keys()) == set( + fully_custom_config.keys() + ), "ensure we cover all config options" + + (tenant_id, _) = env.neon_cli.create_tenant() + ps_http.set_tenant_config(tenant_id, fully_custom_config) + our_tenant_config = ps_http.tenant_config(tenant_id) + assert our_tenant_config.tenant_specific_overrides == fully_custom_config + assert set(our_tenant_config.effective_config.keys()) == set( + fully_custom_config.keys() + ), "ensure we cover all config options" + assert { + k: initial_tenant_config.effective_config[k] != our_tenant_config.effective_config[k] + for k in fully_custom_config.keys() + } == { + k: True for k in fully_custom_config.keys() + }, "ensure our custom config has different values than the default config for all config options, so we know we overrode everything" + + ps_http.tenant_detach(tenant_id) + ps_http.tenant_attach(tenant_id, config=fully_custom_config) + + assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == fully_custom_config + assert set(ps_http.tenant_config(tenant_id).effective_config.keys()) == set( + fully_custom_config.keys() + ), "ensure we cover all config options"