Skip to content

Commit

Permalink
dstool + fix harakiri and slay
Browse files Browse the repository at this point in the history
  • Loading branch information
SammyVimes committed Jan 3, 2025
1 parent 871ccff commit ac7e4b7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 3 deletions.
34 changes: 33 additions & 1 deletion ydb/apps/dstool/lib/dstool_cmd_cluster_workload_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def add_options(p):
p.add_argument('--enable-kill-tablets', action='store_true', help='Enable tablet killer')
p.add_argument('--enable-kill-blob-depot', action='store_true', help='Enable BlobDepot killer')
p.add_argument('--enable-restart-pdisks', action='store_true', help='Enable PDisk restarter')
p.add_argument('--enable-readonly-pdisks', action='store_true', help='Enable SetPDiskReadOnly requests')
p.add_argument('--kill-signal', type=str, default='KILL', help='Kill signal to send to restart node')


Expand Down Expand Up @@ -69,6 +70,8 @@ def do(args):
pdisk_keys = {}
pdisk_key_versions = {}

pdisk_readonly = set()

config_retries = None

while True:
Expand Down Expand Up @@ -157,6 +160,24 @@ def do_restart_pdisk(node_id, pdisk_id):
raise Exception('failed to perform restart request: %s' % e)
if not response.Success:
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)

def do_readonly_pdisk(node_id, pdisk_id, readonly):
assert can_act_on_vslot(node_id, pdisk_id)
request = common.kikimr_bsconfig.TConfigRequest(IgnoreDegradedGroupsChecks=True)
cmd = request.Command.add().SetPDiskReadOnly
cmd.TargetPDiskId.NodeId = node_id
cmd.TargetPDiskId.PDiskId = pdisk_id
cmd.Value = readonly
try:
response = common.invoke_bsc_request(request)
except Exception as e:
raise Exception('failed to perform SetPDiskReadOnly request: %s' % e)
if not response.Success:
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)
if readonly:
pdisk_readonly.add((node_id, pdisk_id))
else:
pdisk_readonly.discard((node_id, pdisk_id))

def do_evict(vslot_id):
assert can_act_on_vslot(*vslot_id)
Expand Down Expand Up @@ -245,15 +266,22 @@ def do_kill_blob_depot():
readonlies = []
unreadonlies = []
pdisk_restarts = []
make_pdisks_readonly = []
make_pdisks_not_readonly = []

for vslot in base_config.VSlot:
if common.is_dynamic_group(vslot.GroupId):
vslot_id = common.get_vslot_id(vslot.VSlotId)
node_id, pdisk_id = vslot_id[:2]
vdisk_id = '[%08x:%d:%d:%d]' % (vslot.GroupId, vslot.FailRealmIdx, vslot.FailDomainIdx, vslot.VDiskIdx)
if vslot_id in vslot_readonly and not args.disable_readonly:
unreadonlies.append(('un-readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, False)))
if can_act_on_vslot(*vslot_id[:2]) and args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % vslot_id[:2], (do_restart_pdisk, *vslot_id[:2])))
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_restart_pdisk, node_id, pdisk_id)))
if can_act_on_vslot(*vslot_id[:2]) and args.enable_readonly_pdisks:
make_pdisks_readonly.append(('readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, True)))
if (node_id, pdisk_id) in pdisk_readonly and args.enable_readonly_pdisks:
make_pdisks_not_readonly.append(('un-readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, False)))
if can_act_on_vslot(*vslot_id) and (recent_restarts or args.disable_restarts):
if not args.disable_evicts:
evicts.append(('evict vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_evict, vslot_id)))
Expand All @@ -277,6 +305,10 @@ def pick(v):
possible_actions.append(('un-readonly', (pick, unreadonlies)))
if pdisk_restarts:
possible_actions.append(('restart-pdisk', (pick, pdisk_restarts)))
if make_pdisks_readonly:
possible_actions.append(('make-pdisks-readonly', (pick, make_pdisks_readonly)))
if make_pdisks_not_readonly:
possible_actions.append(('make-pdisks-not-readonly', (pick, make_pdisks_not_readonly)))

restarts = []

Expand Down
11 changes: 9 additions & 2 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3770,8 +3770,6 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
case ERequestType::RequestChunkReadPiece:
case ERequestType::RequestYardInit:
case ERequestType::RequestCheckSpace:
case ERequestType::RequestHarakiri:
case ERequestType::RequestYardSlay:
case ERequestType::RequestYardControl:
case ERequestType::RequestWhiteboartReport:
case ERequestType::RequestHttpInfo:
Expand Down Expand Up @@ -3806,6 +3804,15 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
case ERequestType::RequestChunkForget:
ActorSystem->Send(sender, new NPDisk::TEvChunkForgetResult(NKikimrProto::CORRUPTED, 0, errorReason));
return true;
case ERequestType::RequestHarakiri:
ActorSystem->Send(sender, new NPDisk::TEvHarakiriResult(NKikimrProto::CORRUPTED, 0, errorReason));
return true;
case ERequestType::RequestYardSlay: {
TSlay &ev = *static_cast<TSlay*>(request);
ActorSystem->Send(sender, new NPDisk::TEvSlayResult(NKikimrProto::CORRUPTED, 0,
ev.VDiskId, ev.SlayOwnerRound, ev.PDiskId, ev.VSlotId, errorReason));
return true;
}

case ERequestType::RequestTryTrimChunk:
case ERequestType::RequestReleaseChunks:
Expand Down

0 comments on commit ac7e4b7

Please sign in to comment.