Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDisk restart and readonly commands with FQDN and disk's path #13196

Merged
merged 4 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ydb/apps/dstool/lib/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import ydb.apps.dstool.lib.dstool_cmd_pdisk_set as pdisk_set
import ydb.apps.dstool.lib.dstool_cmd_pdisk_list as pdisk_list
import ydb.apps.dstool.lib.dstool_cmd_pdisk_stop as pdisk_stop
import ydb.apps.dstool.lib.dstool_cmd_pdisk_restart as pdisk_restart
import ydb.apps.dstool.lib.dstool_cmd_pdisk_readonly as pdisk_readonly

import ydb.apps.dstool.lib.dstool_cmd_vdisk_evict as vdisk_evict
import ydb.apps.dstool.lib.dstool_cmd_vdisk_list as vdisk_list
Expand Down Expand Up @@ -49,14 +51,14 @@
pool_list, pool_create_virtual,
group_check, group_decommit, group_show_blob_info, group_show_storage_efficiency, group_show_usage_by_tablets,
group_state, group_take_snapshot, group_add, group_list, group_virtual_create, group_virtual_cancel,
pdisk_add_by_serial, pdisk_remove_by_serial, pdisk_set, pdisk_list, pdisk_stop,
pdisk_add_by_serial, pdisk_remove_by_serial, pdisk_set, pdisk_list, pdisk_stop, pdisk_restart, pdisk_readonly,
vdisk_evict, vdisk_list, vdisk_set_read_only, vdisk_remove_donor, vdisk_wipe,
device_list,
]

default_structure = [
('device', ['list']),
('pdisk', ['add-by-serial', 'remove-by-serial', 'set', 'list', 'stop']),
('pdisk', ['add-by-serial', 'remove-by-serial', 'set', 'list', 'stop', 'restart', 'readonly']),
('vdisk', ['evict', 'list', 'set-read-only', 'remove-donor', 'wipe']),
('group', ['add', 'check', 'decommit', ('show', ['blob-info', 'storage-efficiency', 'usage-by-tablets']), 'state', 'take-snapshot', 'list', ('virtual', ['create', 'cancel'])]),
('pool', ['list', ('create', ['virtual'])]),
Expand Down
58 changes: 58 additions & 0 deletions ydb/apps/dstool/lib/dstool_cmd_pdisk_readonly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import ydb.apps.dstool.lib.common as common
import sys

description = 'Change PDisk read-only status'


def add_options(p):
common.add_pdisk_select_options(p)
common.add_ignore_degraded_group_check_option(p)
common.add_ignore_failure_model_group_check_option(p)
common.add_ignore_vslot_quotas_option(p)
common.add_basic_format_options(p)
p.add_argument('--enabled', type=str, choices=['true', 'false'], help='Enable read-only mode')


def create_request(args, pdisk):
request = common.create_bsc_request(args)
cmd = request.Command.add().SetPDiskReadOnly

cmd.TargetPDiskId.NodeId = pdisk[0]
cmd.TargetPDiskId.PDiskId = pdisk[1]

cmd.Value = args.enabled == 'true'

return request


def perform_request(request):
return common.invoke_bsc_request(request)


def is_successful_response(response):
return common.is_successful_bsc_response(response)


def do(args):
base_config = common.fetch_base_config()

assert not args.dry_run, '--dry-run is not supported for this command'

pdisks = common.get_selected_pdisks(args, base_config)

if len(pdisks) != 1:
common.print_status(args, success=False, error_reason='Only change one PDisk read-only status at a time')
sys.exit(1)

success = True
error_reason = ''

request = create_request(args, list(pdisks)[0])
response = perform_request(request)
if not is_successful_response(response):
success = False
error_reason += 'Request has failed: \n{0}\n{1}\n'.format(request, response)

common.print_status(args, success, error_reason)
if not success:
sys.exit(1)
55 changes: 55 additions & 0 deletions ydb/apps/dstool/lib/dstool_cmd_pdisk_restart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import ydb.apps.dstool.lib.common as common
import sys

description = 'Restart PDisk'


def add_options(p):
common.add_pdisk_select_options(p)
common.add_ignore_degraded_group_check_option(p)
common.add_ignore_failure_model_group_check_option(p)
common.add_ignore_vslot_quotas_option(p)
common.add_basic_format_options(p)


def create_request(args, pdisk):
request = common.create_bsc_request(args)
cmd = request.Command.add().RestartPDisk

cmd.TargetPDiskId.NodeId = pdisk[0]
cmd.TargetPDiskId.PDiskId = pdisk[1]

return request


def perform_request(request):
return common.invoke_bsc_request(request)


def is_successful_response(response):
return common.is_successful_bsc_response(response)


def do(args):
base_config = common.fetch_base_config()

assert not args.dry_run, '--dry-run is not supported for this command'

pdisks = common.get_selected_pdisks(args, base_config)

if len(pdisks) != 1:
common.print_status(args, success=False, error_reason='Only restart one PDisk at a time')
sys.exit(1)

success = True
error_reason = ''

request = create_request(args, list(pdisks)[0])
response = perform_request(request)
if not is_successful_response(response):
success = False
error_reason += 'Request has failed: \n{0}\n{1}\n'.format(request, response)

common.print_status(args, success, error_reason)
if not success:
sys.exit(1)
4 changes: 2 additions & 2 deletions ydb/apps/dstool/lib/dstool_cmd_pdisk_stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def create_request(args, pdisk):
request = common.create_bsc_request(args)
cmd = request.Command.add().StopPDisk

cmd.HostKey.NodeId = pdisk[0]
cmd.PDiskId = pdisk[1]
cmd.TargetPDiskId.NodeId = pdisk[0]
cmd.TargetPDiskId.PDiskId = pdisk[1]

return request

Expand Down
2 changes: 2 additions & 0 deletions ydb/apps/dstool/lib/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ PY_SRCS(

dstool_cmd_pdisk_add_by_serial.py
dstool_cmd_pdisk_list.py
dstool_cmd_pdisk_readonly.py
dstool_cmd_pdisk_remove_by_serial.py
dstool_cmd_pdisk_restart.py
dstool_cmd_pdisk_set.py
dstool_cmd_pdisk_stop.py

Expand Down
6 changes: 3 additions & 3 deletions ydb/core/blobstorage/ut_blobstorage/stop_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ Y_UNIT_TEST_SUITE(BSCStopPDisk) {
NKikimrBlobStorage::TConfigRequest request;

NKikimrBlobStorage::TStopPDisk* cmd = request.AddCommand()->MutableStopPDisk();
auto* hostKey = cmd->MutableHostKey();
hostKey->SetNodeId(targetNodeId);
cmd->SetPDiskId(targetPDiskId);
auto* pdiskId = cmd->MutableTargetPDiskId();
pdiskId->SetNodeId(targetNodeId);
pdiskId->SetPDiskId(targetPDiskId);

auto response = env.Invoke(request);

Expand Down
65 changes: 40 additions & 25 deletions ydb/core/mind/bscontroller/cmds_box.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,45 @@ namespace NKikimr::NBsController {
boxes.erase(cmd.GetOriginBoxId());
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) {
auto targetPDiskId = cmd.GetTargetPDiskId();
template <class T>
TPDiskId GetPDiskId(const TBlobStorageController::TConfigState& state, const T& command) {
if (command.HasTargetPDiskId() && command.HasTargetPDiskLocation()) {
throw TExError() << "Only one of TargetPDiskId or PDiskLocation can be specified";
}

TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId());
if (command.HasTargetPDiskId()) {
const NKikimrBlobStorage::TPDiskId& pdiskId = command.GetTargetPDiskId();
ui32 targetNodeId = pdiskId.GetNodeId();
ui32 targetPDiskId = pdiskId.GetPDiskId();
if (const auto& hostId = state.HostRecords->GetHostId(targetNodeId)) {
TPDiskId target(targetNodeId, targetPDiskId);
if (state.PDisks.Find(target) && !state.PDisksToRemove.count(target)) {
return target;
}
throw TExPDiskNotFound(targetNodeId, targetPDiskId);
}
throw TExHostNotFound(targetNodeId);
} else if (command.HasTargetPDiskLocation()) {
const NKikimrBlobStorage::TPDiskLocation& pdiskLocation = command.GetTargetPDiskLocation();
const TString& targetFqdn = pdiskLocation.GetFqdn();
const TString& targetDiskPath = pdiskLocation.GetPath();

auto range = state.HostRecords->ResolveNodeId(targetFqdn);

for (auto it = range.first; it != range.second; ++it) {
const TNodeId nodeId = it->second;
if (const auto& pdiskId = state.FindPDiskByLocation(nodeId, targetDiskPath)) {
return *pdiskId;
}
}

throw TExPDiskNotFound(targetFqdn, targetDiskPath);
}
throw TExError() << "Either TargetPDiskId or PDiskLocation must be specified";
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& /*status*/) {
TPDiskId pdiskId = GetPDiskId(*this, cmd);

TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);

Expand All @@ -252,9 +287,7 @@ namespace NKikimr::NBsController {
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TSetPDiskReadOnly& cmd, TStatus& /*status*/) {
auto targetPDiskId = cmd.GetTargetPDiskId();

TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId());
TPDiskId pdiskId = GetPDiskId(*this, cmd);

TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);

Expand All @@ -281,25 +314,7 @@ namespace NKikimr::NBsController {
}

void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TStopPDisk& cmd, TStatus& /*status*/) {
const auto& host = NormalizeHostKey(cmd.GetHostKey());

TPDiskId pdiskId;
if (cmd.GetPDiskId()) {
if (cmd.GetPath()) {
throw TExError() << "TUpdateDriveStatus.Path and PDiskId are mutually exclusive";
}
pdiskId = TPDiskId(host.GetNodeId(), cmd.GetPDiskId());
if (!PDisks.Find(pdiskId) || PDisksToRemove.count(pdiskId)) {
throw TExPDiskNotFound(host, cmd.GetPDiskId(), TString());
}
} else {
const std::optional<TPDiskId> found = FindPDiskByLocation(host.GetNodeId(), cmd.GetPath());
if (found && !PDisksToRemove.count(*found)) {
pdiskId = *found;
} else {
throw TExPDiskNotFound(host, 0, cmd.GetPath());
}
}
TPDiskId pdiskId = GetPDiskId(*this, cmd);

TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);

Expand Down
11 changes: 11 additions & 0 deletions ydb/core/mind/bscontroller/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ namespace NKikimr::NBsController {
<< TErrorParams::IcPort(std::get<1>(hostKey));
}

TExHostNotFound(ui32 nodeId) {
*this << "Host not found"
<< TErrorParams::NodeId(nodeId);
}

NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override {
return NKikimrBlobStorage::TConfigResponse::TStatus::kHostNotFound;
}
Expand All @@ -126,6 +131,12 @@ namespace NKikimr::NBsController {
<< TErrorParams::PDiskId(pdiskId);
}

TExPDiskNotFound(const TString& fqdn, TString path) {
*this << "PDisk not found"
<< TErrorParams::Fqdn(fqdn)
<< TErrorParams::Path(path);
}

NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override {
return NKikimrBlobStorage::TConfigResponse::TStatus::kPDiskNotFound;
}
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "self_heal.h"
#include "storage_pool_stat.h"

#include <util/generic/hash_multi_map.h>

inline IOutputStream& operator <<(IOutputStream& o, NKikimr::TErasureType::EErasureSpecies p) {
return o << NKikimr::TErasureType::ErasureSpeciesName(p);
}
Expand Down Expand Up @@ -1431,6 +1433,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
class THostRecordMapImpl {
THashMap<THostId, THostRecord> HostIdToRecord;
THashMap<TNodeId, THostId> NodeIdToHostId;
THashMultiMap<TString, TNodeId> FqdnToNodeId;

public:
THostRecordMapImpl() = default;
Expand All @@ -1440,6 +1443,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
const THostId hostId(nodeInfo.Host, nodeInfo.Port);
NodeIdToHostId.emplace(nodeInfo.NodeId, hostId);
HostIdToRecord.emplace(hostId, nodeInfo);
FqdnToNodeId.emplace(nodeInfo.Host, nodeInfo.NodeId);
}
}

Expand All @@ -1449,6 +1453,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
const TNodeId nodeId = item.GetNodeId();
NodeIdToHostId.emplace(nodeId, hostId);
HostIdToRecord.emplace(hostId, item);
FqdnToNodeId.emplace(item.GetHost(), nodeId);
}
}

Expand Down Expand Up @@ -1481,6 +1486,10 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
}
}

auto ResolveNodeId(const TString& fqdn) const {
return FqdnToNodeId.equal_range(fqdn);
}

auto begin() const {
return HostIdToRecord.begin();
}
Expand Down
24 changes: 18 additions & 6 deletions ydb/core/protos/blobstorage_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -400,19 +400,31 @@ message TWipeVDisk {
NKikimrBlobStorage.TVDiskID VDiskId = 2;
}

message TPDiskLocation {
string Fqdn = 1; // fully qualified domain name of the host
string Path = 2; // absolute path to the device as enlisted in PDisk configuration
}

message TRestartPDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
oneof PDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
TPDiskLocation TargetPDiskLocation = 2;
}
}

message TSetPDiskReadOnly {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
bool Value = 2;
oneof PDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
TPDiskLocation TargetPDiskLocation = 2;
}
bool Value = 3;
}

message TStopPDisk {
THostKey HostKey = 1; // host on which we are looking for the drive
string Path = 2; // absolute path to the device as enlisted in PDisk configuration
uint32 PDiskId = 3; // may be set instead of path to identify PDisk
oneof PDisk {
NKikimrBlobStorage.TPDiskId TargetPDiskId = 1;
TPDiskLocation TargetPDiskLocation = 2;
}
}

message TSetScrubPeriodicity {
Expand Down
Loading