Skip to content

Commit

Permalink
Merge 95f69be into 56e8b11
Browse files Browse the repository at this point in the history
  • Loading branch information
SammyVimes authored Jan 6, 2025
2 parents 56e8b11 + 95f69be commit 5f86a9b
Show file tree
Hide file tree
Showing 34 changed files with 1,031 additions and 62 deletions.
61 changes: 59 additions & 2 deletions ydb/apps/dstool/lib/dstool_cmd_cluster_workload_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def add_options(p):
p.add_argument('--enable-kill-tablets', action='store_true', help='Enable tablet killer')
p.add_argument('--enable-kill-blob-depot', action='store_true', help='Enable BlobDepot killer')
p.add_argument('--enable-restart-pdisks', action='store_true', help='Enable PDisk restarter')
p.add_argument('--enable-readonly-pdisks', action='store_true', help='Enable SetPDiskReadOnly requests')
p.add_argument('--kill-signal', type=str, default='KILL', help='Kill signal to send to restart node')


Expand Down Expand Up @@ -101,6 +102,12 @@ def do(args):
if vslot.ReadOnly
}

pdisk_readonly = {
(pdisk.NodeId, pdisk.PDiskId)
for pdisk in base_config.PDisk
if pdisk.ReadOnly
}

if (len(pdisk_keys) == 0):
# initialize pdisk_keys and pdisk_key_versions
for node_id in {pdisk.NodeId for pdisk in base_config.PDisk}:
Expand Down Expand Up @@ -137,6 +144,25 @@ def match(x):
return False
return True

def can_act_on_pdisk(node_id, pdisk_id):
def match(x):
return node_id == x[0] and pdisk_id == x[1]

for group in base_config.Group:
if any(map(match, map(common.get_vslot_id, group.VSlotId))):
if not common.is_dynamic_group(group.GroupId):
return False

content = {
common.get_vdisk_id_short(vslot): not match(vslot_id) and vslot.Ready and vdisk_status[vslot_id + common.get_vdisk_id(vslot)]
for vslot_id in map(common.get_vslot_id, group.VSlotId)
for vslot in [vslot_map[vslot_id]]
}
common.print_if_verbose(args, content, file=sys.stderr)
if not grouptool.check_fail_model(content, group.ErasureSpecies):
return False
return True

def do_restart(node_id):
host = node_fqdn_map[node_id]
if args.enable_pdisk_encryption_keys_changes:
Expand All @@ -158,6 +184,20 @@ def do_restart_pdisk(node_id, pdisk_id):
if not response.Success:
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)

def do_readonly_pdisk(node_id, pdisk_id, readonly):
assert can_act_on_vslot(node_id, pdisk_id)
request = common.kikimr_bsconfig.TConfigRequest(IgnoreDegradedGroupsChecks=True)
cmd = request.Command.add().SetPDiskReadOnly
cmd.TargetPDiskId.NodeId = node_id
cmd.TargetPDiskId.PDiskId = pdisk_id
cmd.Value = readonly
try:
response = common.invoke_bsc_request(request)
except Exception as e:
raise Exception('failed to perform SetPDiskReadOnly request: %s' % e)
if not response.Success:
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)

def do_evict(vslot_id):
assert can_act_on_vslot(*vslot_id)
try:
Expand Down Expand Up @@ -245,15 +285,16 @@ def do_kill_blob_depot():
readonlies = []
unreadonlies = []
pdisk_restarts = []
make_pdisks_readonly = []
make_pdisks_not_readonly = []

for vslot in base_config.VSlot:
if common.is_dynamic_group(vslot.GroupId):
vslot_id = common.get_vslot_id(vslot.VSlotId)
node_id, pdisk_id = vslot_id[:2]
vdisk_id = '[%08x:%d:%d:%d]' % (vslot.GroupId, vslot.FailRealmIdx, vslot.FailDomainIdx, vslot.VDiskIdx)
if vslot_id in vslot_readonly and not args.disable_readonly:
unreadonlies.append(('un-readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, False)))
if can_act_on_vslot(*vslot_id[:2]) and args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % vslot_id[:2], (do_restart_pdisk, *vslot_id[:2])))
if can_act_on_vslot(*vslot_id) and (recent_restarts or args.disable_restarts):
if not args.disable_evicts:
evicts.append(('evict vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_evict, vslot_id)))
Expand All @@ -262,6 +303,18 @@ def do_kill_blob_depot():
if not args.disable_readonly:
readonlies.append(('readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, True)))

for pdisk in base_config.PDisk:
node_id, pdisk_id = pdisk.NodeId, pdisk.PDiskId

if can_act_on_pdisk(node_id, pdisk_id):
if args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_restart_pdisk, node_id, pdisk_id)))
if args.enable_readonly_pdisks:
make_pdisks_readonly.append(('readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, True)))

if (node_id, pdisk_id) in pdisk_readonly and args.enable_readonly_pdisks:
make_pdisks_not_readonly.append(('un-readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, False)))

def pick(v):
action_name, action = random.choice(v)
print(action_name)
Expand All @@ -277,6 +330,10 @@ def pick(v):
possible_actions.append(('un-readonly', (pick, unreadonlies)))
if pdisk_restarts:
possible_actions.append(('restart-pdisk', (pick, pdisk_restarts)))
if make_pdisks_readonly:
possible_actions.append(('make-pdisks-readonly', (pick, make_pdisks_readonly)))
if make_pdisks_not_readonly:
possible_actions.append(('make-pdisks-not-readonly', (pick, make_pdisks_not_readonly)))

restarts = []

Expand Down
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/nodewarden/node_warden_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@ namespace NKikimr::NStorage {
std::map<TVSlotId, TVDiskRecord> LocalVDisks;
THashMap<TActorId, TVSlotId> VDiskIdByActor;
std::map<TVSlotId, ui64> SlayInFlight;
std::set<ui32> PDiskRestartInFlight;
// PDiskId -> is another restart required after the current restart.
std::unordered_map<ui32, bool> PDiskRestartInFlight;
TIntrusiveList<TVDiskRecord, TUnreportedMetricTag> VDisksWithUnreportedMetrics;

void DestroyLocalVDisk(TVDiskRecord& vdisk);
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_mon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ void TNodeWarden::RenderWholePage(IOutputStream& out) {
}
}
}
if (!PDiskRestartInFlight.empty()) {
DIV() {
out << "PDiskRestartInFlight# [";
for (const auto& item : PDiskRestartInFlight) {
out << "pdiskId:" << item.first << " -> needsAnotherRestart: " << item.second << ", ";
}
out << "]";
}
}

TAG(TH3) { out << "VDisks"; }
TABLE_CLASS("table oddgray") {
Expand Down
39 changes: 33 additions & 6 deletions ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ namespace NKikimr::NStorage {
pdiskConfig->ExpectedSerial = pdisk.GetExpectedSerial();
}

if (pdisk.HasReadOnly()) {
pdiskConfig->ReadOnly = pdisk.GetReadOnly();
}

// Path scheme: "SectorMap:unique_name[:3000]"
// where '3000' is device size of in GiB.
if (path.Contains(":")) {
Expand Down Expand Up @@ -233,13 +237,25 @@ namespace NKikimr::NStorage {
}

void TNodeWarden::OnPDiskRestartFinished(ui32 pdiskId, NKikimrProto::EReplyStatus status) {
if (PDiskRestartInFlight.erase(pdiskId) == 0) {
auto it = PDiskRestartInFlight.find(pdiskId);
if (it == PDiskRestartInFlight.end()) {
// There was no restart in progress.
return;
}

bool requiresAnotherRestart = it->second;

PDiskRestartInFlight.erase(it);

const TPDiskKey pdiskKey(LocalNodeId, pdiskId);

if (requiresAnotherRestart) {
auto it = LocalPDisks.find(pdiskKey);
auto pdisk = it->second.Record;
DoRestartLocalPDisk(pdisk);
return;
}

const TVSlotId from(pdiskKey.NodeId, pdiskKey.PDiskId, 0);
const TVSlotId to(pdiskKey.NodeId, pdiskKey.PDiskId, Max<ui32>());

Expand Down Expand Up @@ -282,11 +298,12 @@ namespace NKikimr::NStorage {

STLOG(PRI_NOTICE, BS_NODE, NW75, "DoRestartLocalPDisk", (PDiskId, pdiskId));

const auto [_, inserted] = PDiskRestartInFlight.emplace(pdiskId);
const auto [restartIt, inserted] = PDiskRestartInFlight.try_emplace(pdiskId, false);

if (!inserted) {
STLOG(PRI_NOTICE, BS_NODE, NW76, "Restart already in progress", (PDiskId, pdiskId));
// Restart is already in progress.
// Restart is already in progress, but we will need to make a new restart, as the configuration changed.
restartIt->second = true;
return;
}

Expand Down Expand Up @@ -333,16 +350,26 @@ namespace NKikimr::NStorage {
continue;
}

const NKikimrBlobStorage::EEntityStatus entityStatus = pdisk.HasEntityStatus()
NKikimrBlobStorage::EEntityStatus entityStatus = pdisk.HasEntityStatus()
? pdisk.GetEntityStatus()
: NKikimrBlobStorage::INITIAL;

const TPDiskKey key(pdisk);

auto localPdiskIt = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()});
if (localPdiskIt != LocalPDisks.end()) {
auto& record = localPdiskIt->second;

if (record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
// Changing read-only flag requires restart.
entityStatus = NKikimrBlobStorage::RESTART;
}
}

switch (entityStatus) {
case NKikimrBlobStorage::RESTART:
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
it->second.Record = pdisk;
if (localPdiskIt != LocalPDisks.end()) {
localPdiskIt->second.Record = pdisk;
}
DoRestartLocalPDisk(pdisk);
[[fallthrough]];
Expand Down
3 changes: 3 additions & 0 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,9 @@ struct TEvReadLogResult : public TEventLocal<TEvReadLogResult, TEvBlobStorage::E
TLogPosition Position;
TLogPosition NextPosition;
bool IsEndOfLog;
ui32 LastGoodChunkIdx = 0;
ui64 LastGoodSectorIdx = 0;

TStatusFlags StatusFlags;
TString ErrorReason;
TOwner Owner;
Expand Down
17 changes: 16 additions & 1 deletion ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,21 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
TActorId pDiskActor = std::get<2>(*params);
delete params;

TPDiskConfig *cfg = actor->Cfg.Get();

if (cfg->ReadOnly) {
TString readOnlyError = "PDisk is in read-only mode";
LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, " << readOnlyError);
actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(false, readOnlyError));
return nullptr;
}

NPDisk::TKey chunkKey;
NPDisk::TKey logKey;
NPDisk::TKey sysLogKey;
EntropyPool().Read(&chunkKey, sizeof(NKikimr::NPDisk::TKey));
EntropyPool().Read(&logKey, sizeof(NKikimr::NPDisk::TKey));
EntropyPool().Read(&sysLogKey, sizeof(NKikimr::NPDisk::TKey));
TPDiskConfig *cfg = actor->Cfg.Get();

try {
try {
Expand Down Expand Up @@ -448,6 +456,13 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
NActors::TActorSystem* actorSystem = std::get<3>(*params);
TActorId pdiskActor = std::get<4>(*params);

if (cfg->ReadOnly) {
TString readOnlyError = "PDisk is in read-only mode";
LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, " << readOnlyError);
actorSystem->Send(pdiskActor, new TEvPDiskFormattingFinished(false, readOnlyError));
return nullptr;
}

THolder<NPDisk::TPDisk> pDisk(new NPDisk::TPDisk(cfg, counters));

pDisk->Initialize(actorSystem, TActorId());
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ class TPDisk;

IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon,
ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags,
ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk = nullptr);
ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk = nullptr, bool readOnly = false);
IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags,
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr);
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr, bool readOnly = false);

} // NPDisk
} // NKikimr
19 changes: 12 additions & 7 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,7 @@ class TRealBlockDevice : public IBlockDevice {
TFlightControl FlightControl;
TAtomicBlockCounter QuitCounter;
TString LastWarning;
bool ReadOnly;
TDeque<IAsyncIoOperation*> Trash;
TMutex TrashMutex;

Expand All @@ -781,7 +782,7 @@ class TRealBlockDevice : public IBlockDevice {
public:
TRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles,
ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions,
TIntrusivePtr<TSectorMap> sectorMap)
TIntrusivePtr<TSectorMap> sectorMap, bool readOnly)
: Mon(mon)
, ActorSystem(nullptr)
, Path(path)
Expand All @@ -803,6 +804,7 @@ class TRealBlockDevice : public IBlockDevice {
, DeviceInFlight(FastClp2(deviceInFlight))
, FlightControl(CountTrailingZeroBits(DeviceInFlight))
, LastWarning(IsPowerOf2(deviceInFlight) ? "" : "Device inflight must be a power of 2")
, ReadOnly(readOnly)
{
if (sectorMap) {
DriveData = TDriveData();
Expand Down Expand Up @@ -980,6 +982,7 @@ class TRealBlockDevice : public IBlockDevice {
}

void TrimSync(ui32 size, ui64 offset) override {
Y_ABORT_UNLESS(!ReadOnly);
IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(nullptr, {}, nullptr);
IoContext->PreparePTrim(op, size, offset);
IsTrimEnabled = IoContext->DoTrim(op);
Expand All @@ -1006,6 +1009,7 @@ class TRealBlockDevice : public IBlockDevice {
void PwriteAsync(const void *data, ui64 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId,
NWilson::TTraceId *traceId) override {
Y_ABORT_UNLESS(completionAction);
Y_ABORT_UNLESS(!ReadOnly);
if (!IsInitialized) {
completionAction->Release(ActorSystem);
return;
Expand All @@ -1022,6 +1026,7 @@ class TRealBlockDevice : public IBlockDevice {

void FlushAsync(TCompletionAction *completionAction, TReqId reqId) override {
Y_ABORT_UNLESS(completionAction);
Y_ABORT_UNLESS(!ReadOnly);
if (!IsInitialized) {
completionAction->Release(ActorSystem);
return;
Expand Down Expand Up @@ -1301,9 +1306,9 @@ class TCachedBlockDevice : public TRealBlockDevice {
public:
TCachedBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles,
ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions,
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk)
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk, bool readOnly)
: TRealBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags,
maxQueuedCompletionActions, sectorMap)
maxQueuedCompletionActions, sectorMap, readOnly)
, ReadsInFly(0), PDisk(pdisk)
{}

Expand Down Expand Up @@ -1441,14 +1446,14 @@ class TCachedBlockDevice : public TRealBlockDevice {

IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles,
ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions,
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk) {
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk, bool readOnly) {
return new TCachedBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags,
maxQueuedCompletionActions, sectorMap, pdisk);
maxQueuedCompletionActions, sectorMap, pdisk, readOnly);
}

IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags,
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk) {
IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap, pdisk);
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk, bool readOnly) {
IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap, pdisk, readOnly);
device->Initialize(actorSystem, {});
return device;
}
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ struct TPDiskConfig : public TThrRefBase {

NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN;

bool ReadOnly = false;

TPDiskConfig(ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory)
: TPDiskConfig({}, pDiskGuid, pdiskId, pDiskCategory)
{}
Expand Down
Loading

0 comments on commit 5f86a9b

Please sign in to comment.