Skip to content

Commit

Permalink
PDisk restart and readonly commands with FQDN and disk's path (#13196)
Browse files Browse the repository at this point in the history
  • Loading branch information
SammyVimes authored Jan 14, 2025
1 parent ecc265c commit 5316ae5
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 38 deletions.
6 changes: 4 additions & 2 deletions ydb/apps/dstool/lib/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import ydb.apps.dstool.lib.dstool_cmd_pdisk_set as pdisk_set
import ydb.apps.dstool.lib.dstool_cmd_pdisk_list as pdisk_list
import ydb.apps.dstool.lib.dstool_cmd_pdisk_stop as pdisk_stop
import ydb.apps.dstool.lib.dstool_cmd_pdisk_restart as pdisk_restart
import ydb.apps.dstool.lib.dstool_cmd_pdisk_readonly as pdisk_readonly

import ydb.apps.dstool.lib.dstool_cmd_vdisk_evict as vdisk_evict
import ydb.apps.dstool.lib.dstool_cmd_vdisk_list as vdisk_list
Expand Down Expand Up @@ -49,14 +51,14 @@
pool_list, pool_create_virtual,
group_check, group_decommit, group_show_blob_info, group_show_storage_efficiency, group_show_usage_by_tablets,
group_state, group_take_snapshot, group_add, group_list, group_virtual_create, group_virtual_cancel,
pdisk_add_by_serial, pdisk_remove_by_serial, pdisk_set, pdisk_list, pdisk_stop,
pdisk_add_by_serial, pdisk_remove_by_serial, pdisk_set, pdisk_list, pdisk_stop, pdisk_restart, pdisk_readonly,
vdisk_evict, vdisk_list, vdisk_set_read_only, vdisk_remove_donor, vdisk_wipe,
device_list,
]

default_structure = [
('device', ['list']),
('pdisk', ['add-by-serial', 'remove-by-serial', 'set', 'list', 'stop']),
('pdisk', ['add-by-serial', 'remove-by-serial', 'set', 'list', 'stop', 'restart', 'readonly']),
('vdisk', ['evict', 'list', 'set-read-only', 'remove-donor', 'wipe']),
('group', ['add', 'check', 'decommit', ('show', ['blob-info', 'storage-efficiency', 'usage-by-tablets']), 'state', 'take-snapshot', 'list', ('virtual', ['create', 'cancel'])]),
('pool', ['list', ('create', ['virtual'])]),
Expand Down
58 changes: 58 additions & 0 deletions ydb/apps/dstool/lib/dstool_cmd_pdisk_readonly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import ydb.apps.dstool.lib.common as common
import sys

description = 'Change PDisk read-only status'


def add_options(p):
common.add_pdisk_select_options(p)
common.add_ignore_degraded_group_check_option(p)
common.add_ignore_failure_model_group_check_option(p)
common.add_ignore_vslot_quotas_option(p)
common.add_basic_format_options(p)
p.add_argument('--enabled', type=str, choices=['true', 'false'], help='Enable read-only mode')


def create_request(args, pdisk):
request = common.create_bsc_request(args)
cmd = request.Command.add().SetPDiskReadOnly

cmd.TargetPDiskId.NodeId = pdisk[0]
cmd.TargetPDiskId.PDiskId = pdisk[1]

cmd.Value = args.enabled == 'true'

return request


def perform_request(request):
return common.invoke_bsc_request(request)


def is_successful_response(response):
return common.is_successful_bsc_response(response)


def do(args):
base_config = common.fetch_base_config()

assert not args.dry_run, '--dry-run is not supported for this command'

pdisks = common.get_selected_pdisks(args, base_config)

if len(pdisks) != 1:
common.print_status(args, success=False, error_reason='Only change one PDisk read-only status at a time')
sys.exit(1)

success = True
error_reason = ''

request = create_request(args, list(pdisks)[0])
response = perform_request(request)
if not is_successful_response(response):
success = False
error_reason += 'Request has failed: \n{0}\n{1}\n'.format(request, response)

common.print_status(args, success, error_reason)
if not success:
sys.exit(1)
55 changes: 55 additions & 0 deletions ydb/apps/dstool/lib/dstool_cmd_pdisk_restart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import ydb.apps.dstool.lib.common as common
import sys

description = 'Restart PDisk'


def add_options(p):
common.add_pdisk_select_options(p)
common.add_ignore_degraded_group_check_option(p)
common.add_ignore_failure_model_group_check_option(p)
common.add_ignore_vslot_quotas_option(p)
common.add_basic_format_options(p)


def create_request(args, pdisk):
request = common.create_bsc_request(args)
cmd = request.Command.add().RestartPDisk

cmd.TargetPDiskId.NodeId = pdisk[0]
cmd.TargetPDiskId.PDiskId = pdisk[1]

return request


def perform_request(request):
return common.invoke_bsc_request(request)


def is_successful_response(response):
return common.is_successful_bsc_response(response)


def do(args):
base_config = common.fetch_base_config()

assert not args.dry_run, '--dry-run is not supported for this command'

pdisks = common.get_selected_pdisks(args, base_config)

if len(pdisks) != 1:
common.print_status(args, success=False, error_reason='Only restart one PDisk at a time')
sys.exit(1)

success = True
error_reason = ''

request = create_request(args, list(pdisks)[0])
response = perform_request(request)
if not is_successful_response(response):
success = False
error_reason += 'Request has failed: \n{0}\n{1}\n'.format(request, response)

common.print_status(args, success, error_reason)
if not success:
sys.exit(1)
4 changes: 2 additions & 2 deletions ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def create_request(args, pdisk):
request = common.create_bsc_request(args)
cmd = request.Command.add().StopPDisk

cmd.HostKey.NodeId = pdisk[0]
cmd.PDiskId = pdisk[1]
cmd.TargetPDiskId.NodeId = pdisk[0]
cmd.TargetPDiskId.PDiskId = pdisk[1]

return request

Expand Down
2 changes: 2 additions & 0 deletions ydb/apps/dstool/lib/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ PY_SRCS(

dstool_cmd_pdisk_add_by_serial.py
dstool_cmd_pdisk_list.py
dstool_cmd_pdisk_readonly.py
dstool_cmd_pdisk_remove_by_serial.py
dstool_cmd_pdisk_restart.py
dstool_cmd_pdisk_set.py
dstool_cmd_pdisk_stop.py

Expand Down
6 changes: 3 additions & 3 deletions ydb/core/blobstorage/ut_blobstorage/stop_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ Y_UNIT_TEST_SUITE(BSCStopPDisk) {
NKikimrBlobStorage::TConfigRequest request;

NKikimrBlobStorage::TStopPDisk* cmd = request.AddCommand()->MutableStopPDisk();
auto* hostKey = cmd->MutableHostKey();
hostKey->SetNodeId(targetNodeId);
cmd->SetPDiskId(targetPDiskId);
auto* pdiskId = cmd->MutableTargetPDiskId();
pdiskId->SetNodeId(targetNodeId);
pdiskId->SetPDiskId(targetPDiskId);

auto response = env.Invoke(request);

Expand Down
65 changes: 40 additions & 25 deletions ydb/core/mind/bscontroller/cmds_box.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,45 @@ namespace NKikimr::NBsController {
boxes.erase(cmd.GetOriginBoxId());
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) {
auto targetPDiskId = cmd.GetTargetPDiskId();
template <class T>
TPDiskId GetPDiskId(const TBlobStorageController::TConfigState& state, const T& command) {
if (command.HasTargetPDiskId() && command.HasTargetPDiskLocation()) {
throw TExError() << "Only one of TargetPDiskId or PDiskLocation can be specified";
}

TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId());
if (command.HasTargetPDiskId()) {
const NKikimrBlobStorage::TPDiskId& pdiskId = command.GetTargetPDiskId();
ui32 targetNodeId = pdiskId.GetNodeId();
ui32 targetPDiskId = pdiskId.GetPDiskId();
if (const auto& hostId = state.HostRecords->GetHostId(targetNodeId)) {
TPDiskId target(targetNodeId, targetPDiskId);
if (state.PDisks.Find(target) && !state.PDisksToRemove.count(target)) {
return target;
}
throw TExPDiskNotFound(targetNodeId, targetPDiskId);
}
throw TExHostNotFound(targetNodeId);
} else if (command.HasTargetPDiskLocation()) {
const NKikimrBlobStorage::TPDiskLocation& pdiskLocation = command.GetTargetPDiskLocation();
const TString& targetFqdn = pdiskLocation.GetFqdn();
const TString& targetDiskPath = pdiskLocation.GetPath();

auto range = state.HostRecords->ResolveNodeId(targetFqdn);

for (auto it = range.first; it != range.second; ++it) {
const TNodeId nodeId = it->second;
if (const auto& pdiskId = state.FindPDiskByLocation(nodeId, targetDiskPath)) {
return *pdiskId;
}
}

throw TExPDiskNotFound(targetFqdn, targetDiskPath);
}
throw TExError() << "Either TargetPDiskId or PDiskLocation must be specified";
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) {
TPDiskId pdiskId = GetPDiskId(*this, cmd);

TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);

Expand All @@ -252,9 +287,7 @@ namespace NKikimr::NBsController {
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TSetPDiskReadOnly& cmd, TStatus& /*status*/) {
auto targetPDiskId = cmd.GetTargetPDiskId();

TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId());
TPDiskId pdiskId = GetPDiskId(*this, cmd);

TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);

Expand All @@ -281,25 +314,7 @@ namespace NKikimr::NBsController {
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TStopPDisk& cmd, TStatus& /*status*/) {
const auto& host = NormalizeHostKey(cmd.GetHostKey());

TPDiskId pdiskId;
if (cmd.GetPDiskId()) {
if (cmd.GetPath()) {
throw TExError() << "TUpdateDriveStatus.Path and PDiskId are mutually exclusive";
}
pdiskId = TPDiskId(host.GetNodeId(), cmd.GetPDiskId());
if (!PDisks.Find(pdiskId) || PDisksToRemove.count(pdiskId)) {
throw TExPDiskNotFound(host, cmd.GetPDiskId(), TString());
}
} else {
const std::optional<TPDiskId> found = FindPDiskByLocation(host.GetNodeId(), cmd.GetPath());
if (found && !PDisksToRemove.count(*found)) {
pdiskId = *found;
} else {
throw TExPDiskNotFound(host, 0, cmd.GetPath());
}
}
TPDiskId pdiskId = GetPDiskId(*this, cmd);

TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);

Expand Down
11 changes: 11 additions & 0 deletions ydb/core/mind/bscontroller/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ namespace NKikimr::NBsController {
<< TErrorParams::IcPort(std::get<1>(hostKey));
}

TExHostNotFound(ui32 nodeId) {
*this << "Host not found"
<< TErrorParams::NodeId(nodeId);
}

NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override {
return NKikimrBlobStorage::TConfigResponse::TStatus::kHostNotFound;
}
Expand All @@ -126,6 +131,12 @@ namespace NKikimr::NBsController {
<< TErrorParams::PDiskId(pdiskId);
}

TExPDiskNotFound(const TString& fqdn, TString path) {
*this << "PDisk not found"
<< TErrorParams::Fqdn(fqdn)
<< TErrorParams::Path(path);
}

NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override {
return NKikimrBlobStorage::TConfigResponse::TStatus::kPDiskNotFound;
}
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "self_heal.h"
#include "storage_pool_stat.h"

#include <util/generic/hash_multi_map.h>

inline IOutputStream& operator <<(IOutputStream& o, NKikimr::TErasureType::EErasureSpecies p) {
return o << NKikimr::TErasureType::ErasureSpeciesName(p);
}
Expand Down Expand Up @@ -1432,6 +1434,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
class THostRecordMapImpl {
THashMap<THostId, THostRecord> HostIdToRecord;
THashMap<TNodeId, THostId> NodeIdToHostId;
THashMultiMap<TString, TNodeId> FqdnToNodeId;

public:
THostRecordMapImpl() = default;
Expand All @@ -1441,6 +1444,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
const THostId hostId(nodeInfo.Host, nodeInfo.Port);
NodeIdToHostId.emplace(nodeInfo.NodeId, hostId);
HostIdToRecord.emplace(hostId, nodeInfo);
FqdnToNodeId.emplace(nodeInfo.Host, nodeInfo.NodeId);
}
}

Expand All @@ -1450,6 +1454,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
const TNodeId nodeId = item.GetNodeId();
NodeIdToHostId.emplace(nodeId, hostId);
HostIdToRecord.emplace(hostId, item);
FqdnToNodeId.emplace(item.GetHost(), nodeId);
}
}

Expand Down Expand Up @@ -1482,6 +1487,10 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
}
}

auto ResolveNodeId(const TString& fqdn) const {
return FqdnToNodeId.equal_range(fqdn);
}

auto begin() const {
return HostIdToRecord.begin();
}
Expand Down
24 changes: 18 additions & 6 deletions ydb/core/protos/blobstorage_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -400,19 +400,31 @@ message TWipeVDisk {
NKikimrBlobStorage.TVDiskID VDiskId = 2;
}

message TPDiskLocation {
string Fqdn = 1; // fully qualified domain name of the host
string Path = 2; // absolute path to the device as enlisted in PDisk configuration
}

message TRestartPDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
oneof PDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
TPDiskLocation TargetPDiskLocation = 2;
}
}

message TSetPDiskReadOnly {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
bool Value = 2;
oneof PDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
TPDiskLocation TargetPDiskLocation = 2;
}
bool Value = 3;
}

message TStopPDisk {
THostKey HostKey = 1; // host on which we are looking for the drive
string Path = 2; // absolute path to the device as enlisted in PDisk configuration
uint32 PDiskId = 3; // may be set instead of path to identify PDisk
oneof PDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
TPDiskLocation TargetPDiskLocation = 2;
}
}

message TSetScrubPeriodicity {
Expand Down

0 comments on commit 5316ae5

Please sign in to comment.