From 00b30c23722489d63e0b0fb0217958395a5e5e7a Mon Sep 17 00:00:00 2001 From: Semyon Danilov Date: Wed, 8 Jan 2025 16:09:39 +0100 Subject: [PATCH 1/2] PDisk restart and readonly commands with FQDN and disk's path --- ydb/apps/dstool/lib/commands.py | 6 +- .../dstool/lib/dstool_cmd_pdisk_readonly.py | 58 +++++++++++++++ .../dstool/lib/dstool_cmd_pdisk_restart.py | 55 ++++++++++++++ ydb/apps/dstool/lib/ya.make | 2 + ydb/core/mind/bscontroller/cmds_box.cpp | 74 ++++++++++++------- ydb/core/mind/bscontroller/error.h | 6 ++ ydb/core/protos/blobstorage_config.proto | 6 ++ 7 files changed, 180 insertions(+), 27 deletions(-) create mode 100644 ydb/apps/dstool/lib/dstool_cmd_pdisk_readonly.py create mode 100644 ydb/apps/dstool/lib/dstool_cmd_pdisk_restart.py diff --git a/ydb/apps/dstool/lib/commands.py b/ydb/apps/dstool/lib/commands.py index 568cd7b08a0a..e403b5c613a8 100644 --- a/ydb/apps/dstool/lib/commands.py +++ b/ydb/apps/dstool/lib/commands.py @@ -5,6 +5,8 @@ import ydb.apps.dstool.lib.dstool_cmd_pdisk_set as pdisk_set import ydb.apps.dstool.lib.dstool_cmd_pdisk_list as pdisk_list import ydb.apps.dstool.lib.dstool_cmd_pdisk_stop as pdisk_stop +import ydb.apps.dstool.lib.dstool_cmd_pdisk_restart as pdisk_restart +import ydb.apps.dstool.lib.dstool_cmd_pdisk_readonly as pdisk_readonly import ydb.apps.dstool.lib.dstool_cmd_vdisk_evict as vdisk_evict import ydb.apps.dstool.lib.dstool_cmd_vdisk_list as vdisk_list @@ -49,14 +51,14 @@ pool_list, pool_create_virtual, group_check, group_decommit, group_show_blob_info, group_show_storage_efficiency, group_show_usage_by_tablets, group_state, group_take_snapshot, group_add, group_list, group_virtual_create, group_virtual_cancel, - pdisk_add_by_serial, pdisk_remove_by_serial, pdisk_set, pdisk_list, pdisk_stop, + pdisk_add_by_serial, pdisk_remove_by_serial, pdisk_set, pdisk_list, pdisk_stop, pdisk_restart, pdisk_readonly, vdisk_evict, vdisk_list, vdisk_set_read_only, vdisk_remove_donor, vdisk_wipe, device_list, ] default_structure = [ ('device', ['list']), - ('pdisk', ['add-by-serial', 'remove-by-serial', 'set', 'list', 'stop']), + ('pdisk', ['add-by-serial', 'remove-by-serial', 'set', 'list', 'stop', 'restart', 'readonly']), ('vdisk', ['evict', 'list', 'set-read-only', 'remove-donor', 'wipe']), ('group', ['add', 'check', 'decommit', ('show', ['blob-info', 'storage-efficiency', 'usage-by-tablets']), 'state', 'take-snapshot', 'list', ('virtual', ['create', 'cancel'])]), ('pool', ['list', ('create', ['virtual'])]), diff --git a/ydb/apps/dstool/lib/dstool_cmd_pdisk_readonly.py b/ydb/apps/dstool/lib/dstool_cmd_pdisk_readonly.py new file mode 100644 index 000000000000..0475331f6095 --- /dev/null +++ b/ydb/apps/dstool/lib/dstool_cmd_pdisk_readonly.py @@ -0,0 +1,58 @@ +import ydb.apps.dstool.lib.common as common +import sys + +description = 'Change PDisk read-only status' + + +def add_options(p): + common.add_pdisk_select_options(p) + common.add_ignore_degraded_group_check_option(p) + common.add_ignore_failure_model_group_check_option(p) + common.add_ignore_vslot_quotas_option(p) + common.add_basic_format_options(p) + p.add_argument('--enabled', type=str, choices=['true', 'false'], help='Enable read-only mode') + + +def create_request(args, pdisk): + request = common.create_bsc_request(args) + cmd = request.Command.add().SetPDiskReadOnly + + cmd.TargetPDiskId.NodeId = pdisk[0] + cmd.TargetPDiskId.PDiskId = pdisk[1] + + cmd.Value = args.enabled == 'true' + + return request + + +def perform_request(request): + return common.invoke_bsc_request(request) + + +def is_successful_response(response): + return common.is_successful_bsc_response(response) + + +def do(args): + base_config = common.fetch_base_config() + + assert not args.dry_run, '--dry-run is not supported for this command' + + pdisks = common.get_selected_pdisks(args, base_config) + + if len(pdisks) != 1: + common.print_status(args, success=False, error_reason='Only change one PDisk read-only status at a time') + sys.exit(1) + + success = True + error_reason = '' + + request = create_request(args, list(pdisks)[0]) + response = perform_request(request) + if not is_successful_response(response): + success = False + error_reason += 'Request has failed: \n{0}\n{1}\n'.format(request, response) + + common.print_status(args, success, error_reason) + if not success: + sys.exit(1) diff --git a/ydb/apps/dstool/lib/dstool_cmd_pdisk_restart.py b/ydb/apps/dstool/lib/dstool_cmd_pdisk_restart.py new file mode 100644 index 000000000000..f682b6e84cac --- /dev/null +++ b/ydb/apps/dstool/lib/dstool_cmd_pdisk_restart.py @@ -0,0 +1,55 @@ +import ydb.apps.dstool.lib.common as common +import sys + +description = 'Restart PDisk' + + +def add_options(p): + common.add_pdisk_select_options(p) + common.add_ignore_degraded_group_check_option(p) + common.add_ignore_failure_model_group_check_option(p) + common.add_ignore_vslot_quotas_option(p) + common.add_basic_format_options(p) + + +def create_request(args, pdisk): + request = common.create_bsc_request(args) + cmd = request.Command.add().RestartPDisk + + cmd.TargetPDiskId.NodeId = pdisk[0] + cmd.TargetPDiskId.PDiskId = pdisk[1] + + return request + + +def perform_request(request): + return common.invoke_bsc_request(request) + + +def is_successful_response(response): + return common.is_successful_bsc_response(response) + + +def do(args): + base_config = common.fetch_base_config() + + assert not args.dry_run, '--dry-run is not supported for this command' + + pdisks = common.get_selected_pdisks(args, base_config) + + if len(pdisks) != 1: + common.print_status(args, success=False, error_reason='Only restart one PDisk at a time') + sys.exit(1) + + success = True + error_reason = '' + + request = create_request(args, list(pdisks)[0]) + response = perform_request(request) + if not is_successful_response(response): + success = False + error_reason += 'Request has failed: \n{0}\n{1}\n'.format(request, response) + + common.print_status(args, success, error_reason) + if not success: + sys.exit(1) diff --git a/ydb/apps/dstool/lib/ya.make b/ydb/apps/dstool/lib/ya.make index f8b5255faed4..0a4c190ab243 100644 --- a/ydb/apps/dstool/lib/ya.make +++ b/ydb/apps/dstool/lib/ya.make @@ -13,7 +13,9 @@ PY_SRCS( dstool_cmd_pdisk_add_by_serial.py dstool_cmd_pdisk_list.py + dstool_cmd_pdisk_readonly.py dstool_cmd_pdisk_remove_by_serial.py + dstool_cmd_pdisk_restart.py dstool_cmd_pdisk_set.py dstool_cmd_pdisk_stop.py diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index 234f7bf800f1..5e5424868062 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -226,10 +226,54 @@ namespace NKikimr::NBsController { boxes.erase(cmd.GetOriginBoxId()); } - void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) { - auto targetPDiskId = cmd.GetTargetPDiskId(); + TPDiskId GetPDiskId(const TBlobStorageController::TConfigState& state, const TString& fqdn, ui32 icPort, ui32 nodeId, const TString& diskPath, ui32 pdiskId) { + ui32 targetNodeId = nodeId; + + bool foundNode = false; + + if (!nodeId) { + const TBlobStorageController::THostId key(fqdn, icPort); + if (const auto& nodeId = state.HostRecords->ResolveNodeId(key)) { + targetNodeId = *nodeId; + foundNode = true; + } + } else if (!fqdn && !icPort) { + if (const auto& hostId = state.HostRecords->GetHostId(nodeId)) { + foundNode = true; + } + } else { + // when both fqdn:port and node id were filled, we have to ensure that they match each other + foundNode = state.HostRecords->GetHostId(nodeId) == std::make_tuple(fqdn, icPort); + } + + if (!foundNode) { + throw TExHostNotFound({fqdn, icPort}) << " HostKey# " << fqdn << ":" << icPort << " incorrect"; + } + + TPDiskId res; + + if (pdiskId) { + if (diskPath) { + throw TExError() << "TUpdateDriveStatus.Path and PDiskId are mutually exclusive"; + } + res = TPDiskId(targetNodeId, pdiskId); + if (!state.PDisks.Find(res) || state.PDisksToRemove.count(res)) { + throw TExPDiskNotFound(targetNodeId, pdiskId); + } + } else { + const std::optional found = state.FindPDiskByLocation(targetNodeId, diskPath); + if (found && !state.PDisksToRemove.count(*found)) { + res = *found; + } else { + throw TExPDiskNotFound(nodeId, diskPath); + } + } + + return res; + } - TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId()); + void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) { + TPDiskId pdiskId = GetPDiskId(*this, cmd.GetFqdn(), cmd.GetIcPort(), cmd.GetTargetPDiskId().GetNodeId(), cmd.GetPath(), cmd.GetTargetPDiskId().GetPDiskId()); TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); @@ -252,9 +296,7 @@ namespace NKikimr::NBsController { } void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TSetPDiskReadOnly& cmd, TStatus& /*status*/) { - auto targetPDiskId = cmd.GetTargetPDiskId(); - - TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId()); + TPDiskId pdiskId = GetPDiskId(*this, cmd.GetFqdn(), cmd.GetIcPort(), cmd.GetTargetPDiskId().GetNodeId(), cmd.GetPath(), cmd.GetTargetPDiskId().GetPDiskId()); TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); @@ -281,25 +323,7 @@ namespace NKikimr::NBsController { } void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TStopPDisk& cmd, TStatus& /*status*/) { - const auto& host = NormalizeHostKey(cmd.GetHostKey()); - - TPDiskId pdiskId; - if (cmd.GetPDiskId()) { - if (cmd.GetPath()) { - throw TExError() << "TUpdateDriveStatus.Path and PDiskId are mutually exclusive"; - } - pdiskId = TPDiskId(host.GetNodeId(), cmd.GetPDiskId()); - if (!PDisks.Find(pdiskId) || PDisksToRemove.count(pdiskId)) { - throw TExPDiskNotFound(host, cmd.GetPDiskId(), TString()); - } - } else { - const std::optional found = FindPDiskByLocation(host.GetNodeId(), cmd.GetPath()); - if (found && !PDisksToRemove.count(*found)) { - pdiskId = *found; - } else { - throw TExPDiskNotFound(host, 0, cmd.GetPath()); - } - } + TPDiskId pdiskId = GetPDiskId(*this, cmd.GetHostKey().GetFqdn(), cmd.GetHostKey().GetIcPort(), cmd.GetHostKey().GetNodeId(), cmd.GetPath(), cmd.GetPDiskId()); TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); diff --git a/ydb/core/mind/bscontroller/error.h b/ydb/core/mind/bscontroller/error.h index 704f722eee16..34058ebe6a0a 100644 --- a/ydb/core/mind/bscontroller/error.h +++ b/ydb/core/mind/bscontroller/error.h @@ -126,6 +126,12 @@ namespace NKikimr::NBsController { << TErrorParams::PDiskId(pdiskId); } + TExPDiskNotFound(ui32 nodeId, TString path) { + *this << "PDisk not found" + << TErrorParams::NodeId(nodeId) + << TErrorParams::Path(path); + } + NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override { return NKikimrBlobStorage::TConfigResponse::TStatus::kPDiskNotFound; } diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index 66bc5363a260..9c281e851cf7 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -402,11 +402,17 @@ message TWipeVDisk { message TRestartPDisk { NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; + string Path = 2; + string Fqdn = 3; + int32 IcPort = 4; } message TSetPDiskReadOnly { NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; bool Value = 2; + string Path = 3; + string Fqdn = 4; + int32 IcPort = 5; } message TStopPDisk { From cf4cdbccca51fd2725f2c30c9e0b233d6d99d95e Mon Sep 17 00:00:00 2001 From: Semyon Danilov Date: Wed, 8 Jan 2025 21:12:43 +0100 Subject: [PATCH 2/2] .. --- ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py | 4 +- .../blobstorage/ut_blobstorage/stop_pdisk.cpp | 6 +- ydb/core/mind/bscontroller/cmds_box.cpp | 79 ++++++++----------- ydb/core/mind/bscontroller/error.h | 9 ++- ydb/core/protos/blobstorage_config.proto | 20 ++--- 5 files changed, 57 insertions(+), 61 deletions(-) diff --git a/ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py b/ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py index d4f8df67d37a..eac1043e5c9f 100644 --- a/ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py +++ b/ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py @@ -16,8 +16,8 @@ def create_request(args, pdisk): request = common.create_bsc_request(args) cmd = request.Command.add().StopPDisk - cmd.HostKey.NodeId = pdisk[0] - cmd.PDiskId = pdisk[1] + cmd.TargetPDiskId.NodeId = pdisk[0] + cmd.TargetPDiskId.PDiskId = pdisk[1] return request diff --git a/ydb/core/blobstorage/ut_blobstorage/stop_pdisk.cpp b/ydb/core/blobstorage/ut_blobstorage/stop_pdisk.cpp index a664b6fffca8..26f4a54e3710 100644 --- a/ydb/core/blobstorage/ut_blobstorage/stop_pdisk.cpp +++ b/ydb/core/blobstorage/ut_blobstorage/stop_pdisk.cpp @@ -23,9 +23,9 @@ Y_UNIT_TEST_SUITE(BSCStopPDisk) { NKikimrBlobStorage::TConfigRequest request; NKikimrBlobStorage::TStopPDisk* cmd = request.AddCommand()->MutableStopPDisk(); - auto* hostKey = cmd->MutableHostKey(); - hostKey->SetNodeId(targetNodeId); - cmd->SetPDiskId(targetPDiskId); + auto* pdiskId = cmd->MutableTargetPDiskId(); + pdiskId->SetNodeId(targetNodeId); + pdiskId->SetPDiskId(targetPDiskId); auto response = env.Invoke(request); diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index 5e5424868062..c28e0140adb6 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -226,54 +226,45 @@ namespace NKikimr::NBsController { boxes.erase(cmd.GetOriginBoxId()); } - TPDiskId GetPDiskId(const TBlobStorageController::TConfigState& state, const TString& fqdn, ui32 icPort, ui32 nodeId, const TString& diskPath, ui32 pdiskId) { - ui32 targetNodeId = nodeId; - - bool foundNode = false; - - if (!nodeId) { - const TBlobStorageController::THostId key(fqdn, icPort); - if (const auto& nodeId = state.HostRecords->ResolveNodeId(key)) { - targetNodeId = *nodeId; - foundNode = true; - } - } else if (!fqdn && !icPort) { - if (const auto& hostId = state.HostRecords->GetHostId(nodeId)) { - foundNode = true; - } - } else { - // when both fqdn:port and node id were filled, we have to ensure that they match each other - foundNode = state.HostRecords->GetHostId(nodeId) == std::make_tuple(fqdn, icPort); - } - - if (!foundNode) { - throw TExHostNotFound({fqdn, icPort}) << " HostKey# " << fqdn << ":" << icPort << " incorrect"; - } - - TPDiskId res; - - if (pdiskId) { - if (diskPath) { - throw TExError() << "TUpdateDriveStatus.Path and PDiskId are mutually exclusive"; - } - res = TPDiskId(targetNodeId, pdiskId); - if (!state.PDisks.Find(res) || state.PDisksToRemove.count(res)) { - throw TExPDiskNotFound(targetNodeId, pdiskId); + template + TPDiskId GetPDiskId(const TBlobStorageController::TConfigState& state, const T& command) { + if (command.HasTargetPDiskId()) { + const NKikimrBlobStorage::TPDiskId& pdiskId = command.GetTargetPDiskId(); + ui32 targetNodeId = pdiskId.GetNodeId(); + ui32 targetPDiskId = pdiskId.GetPDiskId(); + if (const auto& hostId = state.HostRecords->GetHostId(targetNodeId)) { + TPDiskId target(targetNodeId, targetPDiskId); + if (state.PDisks.Find(target) && !state.PDisksToRemove.count(target)) { + return target; + } + throw TExPDiskNotFound(targetNodeId, targetPDiskId); } - } else { - const std::optional found = state.FindPDiskByLocation(targetNodeId, diskPath); - if (found && !state.PDisksToRemove.count(*found)) { - res = *found; - } else { - throw TExPDiskNotFound(nodeId, diskPath); + throw TExHostNotFound(targetNodeId); + } else if (command.HasTargetPDiskLocation()) { + const NKikimrBlobStorage::TPDiskLocation& pdiskLocation = command.GetTargetPDiskLocation(); + const TString& targetFqdn = pdiskLocation.GetFqdn(); + const TString& targetDiskPath = pdiskLocation.GetPath(); + // iterate over state.HostRecords map using iterator (->begin() and ->end()) + for (auto it = state.HostRecords->begin(); it != state.HostRecords->end(); ++it) { + const auto& [hostId, hostRecord] = *it; + + TString fqdn = std::get<0>(hostId); + + if (targetFqdn == fqdn) { + ui32 targetNodeId = hostRecord.NodeId; + + if (const auto& pdiskId = state.FindPDiskByLocation(targetNodeId, targetDiskPath)) { + return *pdiskId; + } + } } + throw TExPDiskNotFound(targetFqdn, targetDiskPath); } - - return res; + throw TExError() << "Either TargetPDiskId or PDiskLocation must be specified"; } void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) { - TPDiskId pdiskId = GetPDiskId(*this, cmd.GetFqdn(), cmd.GetIcPort(), cmd.GetTargetPDiskId().GetNodeId(), cmd.GetPath(), cmd.GetTargetPDiskId().GetPDiskId()); + TPDiskId pdiskId = GetPDiskId(*this, cmd); TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); @@ -296,7 +287,7 @@ namespace NKikimr::NBsController { } void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TSetPDiskReadOnly& cmd, TStatus& /*status*/) { - TPDiskId pdiskId = GetPDiskId(*this, cmd.GetFqdn(), cmd.GetIcPort(), cmd.GetTargetPDiskId().GetNodeId(), cmd.GetPath(), cmd.GetTargetPDiskId().GetPDiskId()); + TPDiskId pdiskId = GetPDiskId(*this, cmd); TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); @@ -323,7 +314,7 @@ namespace NKikimr::NBsController { } void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TStopPDisk& cmd, TStatus& /*status*/) { - TPDiskId pdiskId = GetPDiskId(*this, cmd.GetHostKey().GetFqdn(), cmd.GetHostKey().GetIcPort(), cmd.GetHostKey().GetNodeId(), cmd.GetPath(), cmd.GetPDiskId()); + TPDiskId pdiskId = GetPDiskId(*this, cmd); TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); diff --git a/ydb/core/mind/bscontroller/error.h b/ydb/core/mind/bscontroller/error.h index 34058ebe6a0a..203c64ec9139 100644 --- a/ydb/core/mind/bscontroller/error.h +++ b/ydb/core/mind/bscontroller/error.h @@ -105,6 +105,11 @@ namespace NKikimr::NBsController { << TErrorParams::IcPort(std::get<1>(hostKey)); } + TExHostNotFound(ui32 nodeId) { + *this << "Host not found" + << TErrorParams::NodeId(nodeId); + } + NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override { return NKikimrBlobStorage::TConfigResponse::TStatus::kHostNotFound; } @@ -126,9 +131,9 @@ namespace NKikimr::NBsController { << TErrorParams::PDiskId(pdiskId); } - TExPDiskNotFound(ui32 nodeId, TString path) { + TExPDiskNotFound(const TString& fqdn, TString path) { *this << "PDisk not found" - << TErrorParams::NodeId(nodeId) + << TErrorParams::Fqdn(fqdn) << TErrorParams::Path(path); } diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index 9c281e851cf7..db205107032b 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -400,25 +400,25 @@ message TWipeVDisk { NKikimrBlobStorage.TVDiskID VDiskId = 2; } +message TPDiskLocation { + string Fqdn = 1; // fully qualified domain name of the host + string Path = 2; // absolute path to the device as enlisted in PDisk configuration +} + message TRestartPDisk { NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; - string Path = 2; - string Fqdn = 3; - int32 IcPort = 4; + TPDiskLocation TargetPDiskLocation = 2; } message TSetPDiskReadOnly { NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; - bool Value = 2; - string Path = 3; - string Fqdn = 4; - int32 IcPort = 5; + TPDiskLocation TargetPDiskLocation = 2; + bool Value = 3; } message TStopPDisk { - THostKey HostKey = 1; // host on which we are looking for the drive - string Path = 2; // absolute path to the device as enlisted in PDisk configuration - uint32 PDiskId = 3; // may be set instead of path to identify PDisk + NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; + TPDiskLocation TargetPDiskLocation = 2; } message TSetScrubPeriodicity {