From 3e43a7146e86bf1d01290e1ff6a582c5844f592c Mon Sep 17 00:00:00 2001 From: Semyon Danilov Date: Mon, 16 Dec 2024 17:28:28 +0400 Subject: [PATCH] ReadOnly pdisk (cherry-picked from 9d8fcc7255e9a013ba6d2cae5cc7d30622842100) --- .../blobstorage/nodewarden/node_warden_impl.h | 3 +- .../nodewarden/node_warden_mon.cpp | 9 + .../nodewarden/node_warden_pdisk.cpp | 36 +- .../blobstorage/pdisk/blobstorage_pdisk.h | 3 + .../pdisk/blobstorage_pdisk_actor.cpp | 25 +- .../pdisk/blobstorage_pdisk_blockdevice.h | 4 +- .../blobstorage_pdisk_blockdevice_async.cpp | 19 +- .../pdisk/blobstorage_pdisk_config.h | 2 + .../pdisk/blobstorage_pdisk_impl.cpp | 93 ++++- .../pdisk/blobstorage_pdisk_impl.h | 7 +- .../pdisk/blobstorage_pdisk_impl_http.cpp | 14 +- .../pdisk/blobstorage_pdisk_impl_log.cpp | 59 ++- .../pdisk/blobstorage_pdisk_logreader.cpp | 4 + .../pdisk/blobstorage_pdisk_ut.cpp | 77 ++++ .../pdisk/blobstorage_pdisk_ut_env.h | 7 +- .../blobstorage/pdisk/mock/pdisk_mock.cpp | 14 +- ydb/core/blobstorage/pdisk/mock/pdisk_mock.h | 4 +- ydb/core/blobstorage/ut_blobstorage/lib/env.h | 2 +- .../ut_blobstorage/read_only_pdisk.cpp | 368 ++++++++++++++++++ .../ut_blobstorage/ut_read_only_pdisk/ya.make | 15 + ydb/core/blobstorage/ut_blobstorage/ya.make | 1 + ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp | 3 +- ydb/core/blobstorage/ut_vdisk/lib/prepare.h | 1 + ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp | 22 ++ ydb/core/mind/bscontroller/bsc.cpp | 3 +- ydb/core/mind/bscontroller/cmds_box.cpp | 29 ++ ydb/core/mind/bscontroller/config.cpp | 3 + ydb/core/mind/bscontroller/config.h | 1 + ydb/core/mind/bscontroller/config_cmd.cpp | 1 + ydb/core/mind/bscontroller/mood.h | 5 +- ydb/core/mind/bscontroller/register_node.cpp | 3 + ydb/core/protos/blobstorage.proto | 1 + ydb/core/protos/blobstorage_config.proto | 6 + 33 files changed, 782 insertions(+), 62 deletions(-) create mode 100644 ydb/core/blobstorage/ut_blobstorage/read_only_pdisk.cpp create mode 100644 ydb/core/blobstorage/ut_blobstorage/ut_read_only_pdisk/ya.make diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.h b/ydb/core/blobstorage/nodewarden/node_warden_impl.h index 60db21bac6ad..3d9c5df620a7 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_impl.h +++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.h @@ -420,7 +420,8 @@ namespace NKikimr::NStorage { std::map LocalVDisks; THashMap VDiskIdByActor; std::map SlayInFlight; - std::set PDiskRestartInFlight; + // PDiskId -> is another restart required after the current restart. + std::unordered_map PDiskRestartInFlight; TIntrusiveList VDisksWithUnreportedMetrics; void DestroyLocalVDisk(TVDiskRecord& vdisk); diff --git a/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp b/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp index 58ddd7dbdd08..1f94984e6851 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp @@ -147,6 +147,15 @@ void TNodeWarden::RenderWholePage(IOutputStream& out) { } } } + if (!PDiskRestartInFlight.empty()) { + DIV() { + out << "PDiskRestartInFlight# ["; + for (const auto& item : PDiskRestartInFlight) { + out << "pdiskId:" << item.first << " -> needsAnotherRestart: " << item.second << ", "; + } + out << "]"; + } + } TAG(TH3) { out << "VDisks"; } TABLE_CLASS("table oddgray") { diff --git a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp index bd8df95a2139..d139452197f3 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp @@ -39,6 +39,10 @@ namespace NKikimr::NStorage { pdiskConfig->ExpectedSerial = pdisk.GetExpectedSerial(); } + if (pdisk.HasReadOnly()) { + pdiskConfig->ReadOnly = pdisk.GetReadOnly(); + } + // Path scheme: "SectorMap:unique_name[:3000]" // where '3000' is device size of in GiB. if (path.Contains(":")) { @@ -224,13 +228,25 @@ namespace NKikimr::NStorage { } void TNodeWarden::OnPDiskRestartFinished(ui32 pdiskId, NKikimrProto::EReplyStatus status) { - if (PDiskRestartInFlight.erase(pdiskId) == 0) { + auto it = PDiskRestartInFlight.find(pdiskId); + if (it == PDiskRestartInFlight.end()) { // There was no restart in progress. return; } + bool requiresAnotherRestart = it->second; + + PDiskRestartInFlight.erase(it); + const TPDiskKey pdiskKey(LocalNodeId, pdiskId); + if (requiresAnotherRestart) { + auto it = LocalPDisks.find(pdiskKey); + auto pdisk = it->second.Record; + DoRestartLocalPDisk(pdisk); + return; + } + const TVSlotId from(pdiskKey.NodeId, pdiskKey.PDiskId, 0); const TVSlotId to(pdiskKey.NodeId, pdiskKey.PDiskId, Max()); @@ -273,11 +289,12 @@ namespace NKikimr::NStorage { STLOG(PRI_NOTICE, BS_NODE, NW75, "DoRestartLocalPDisk", (PDiskId, pdiskId)); - const auto [_, inserted] = PDiskRestartInFlight.emplace(pdiskId); + const auto [restartIt, inserted] = PDiskRestartInFlight.try_emplace(pdiskId, false); if (!inserted) { STLOG(PRI_NOTICE, BS_NODE, NW76, "Restart already in progress", (PDiskId, pdiskId)); - // Restart is already in progress. + // Restart is already in progress, but we will need to make a new restart, as the configuration changed. + restartIt->second = true; return; } @@ -324,12 +341,23 @@ namespace NKikimr::NStorage { continue; } - const NKikimrBlobStorage::EEntityStatus entityStatus = pdisk.HasEntityStatus() + NKikimrBlobStorage::EEntityStatus entityStatus = pdisk.HasEntityStatus() ? pdisk.GetEntityStatus() : NKikimrBlobStorage::INITIAL; const TPDiskKey key(pdisk); + if (pdisk.HasReadOnly()) { + if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) { + auto& record = it->second; + + if (!record.Record.HasReadOnly() || record.Record.GetReadOnly() != pdisk.GetReadOnly()) { + // Changing read-only flag requires restart. + entityStatus = NKikimrBlobStorage::RESTART; + } + } + } + switch (entityStatus) { case NKikimrBlobStorage::RESTART: if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h index 7cdc40c002c8..1aff577334d6 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h @@ -451,6 +451,9 @@ struct TEvReadLogResult : public TEventLocal { TActorId pDiskActor = std::get<2>(*params); delete params; + TPDiskConfig *cfg = actor->Cfg.Get(); + + if (cfg->ReadOnly) { + TString readOnlyError = "PDisk is in read-only mode"; + LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, " << readOnlyError); + actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(false, readOnlyError)); + return nullptr; + } + NPDisk::TKey chunkKey; NPDisk::TKey logKey; NPDisk::TKey sysLogKey; EntropyPool().Read(&chunkKey, sizeof(NKikimr::NPDisk::TKey)); EntropyPool().Read(&logKey, sizeof(NKikimr::NPDisk::TKey)); EntropyPool().Read(&sysLogKey, sizeof(NKikimr::NPDisk::TKey)); - TPDiskConfig *cfg = actor->Cfg.Get(); try { try { @@ -448,6 +456,13 @@ class TPDiskActor : public TActorBootstrapped { NActors::TActorSystem* actorSystem = std::get<3>(*params); TActorId pdiskActor = std::get<4>(*params); + if (cfg->ReadOnly) { + TString readOnlyError = "PDisk is in read-only mode"; + LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, " << readOnlyError); + actorSystem->Send(pdiskActor, new TEvPDiskFormattingFinished(false, readOnlyError)); + return nullptr; + } + THolder pDisk(new NPDisk::TPDisk(cfg, counters)); pDisk->Initialize(actorSystem, TActorId()); @@ -1004,7 +1019,7 @@ class TPDiskActor : public TActorBootstrapped { } Send(ev->Sender, new TEvBlobStorage::TEvNotifyWardenPDiskRestarted(PDisk->PDiskId, NKikimrProto::EReplyStatus::NOTREADY)); - + return; } @@ -1017,7 +1032,7 @@ class TPDiskActor : public TActorBootstrapped { NPDisk::TMainKey newMainKey = ev->Get()->MainKey; SecureWipeBuffer((ui8*)ev->Get()->MainKey.Keys.data(), sizeof(NPDisk::TKey) * ev->Get()->MainKey.Keys.size()); - + LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId << " Going to restart PDisk since received TEvAskWardenRestartPDiskResult"); @@ -1031,7 +1046,7 @@ class TPDiskActor : public TActorBootstrapped { TIntrusivePtr actorCfg = std::move(Cfg); auto& newCfg = ev->Get()->Config; - + if (newCfg) { Y_VERIFY_S(newCfg->PDiskId == pdiskId, "New config's PDiskId# " << newCfg->PDiskId << " is not equal to real PDiskId# " << pdiskId); @@ -1046,7 +1061,7 @@ class TPDiskActor : public TActorBootstrapped { TGenericExecutorThread& executorThread = actorCtx.ExecutorThread; PassAway(); - + CreatePDiskActor(executorThread, counters, actorCfg, newMainKey, pdiskId, poolId, nodeId); Send(ev->Sender, new TEvBlobStorage::TEvNotifyWardenPDiskRestarted(pdiskId)); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h index 1388ec92fa4d..145dc0c53aa2 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h @@ -66,9 +66,9 @@ class TPDisk; IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, - ui32 maxQueuedCompletionActions, TIntrusivePtr sectorMap, TPDisk * const pdisk = nullptr); + ui32 maxQueuedCompletionActions, TIntrusivePtr sectorMap, TPDisk * const pdisk = nullptr, bool readOnly = false); IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags, - TIntrusivePtr sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr); + TIntrusivePtr sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr, bool readOnly = false); } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp index 7d3f6ab8cc70..7a9cb949d5fe 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp @@ -773,6 +773,7 @@ class TRealBlockDevice : public IBlockDevice { TFlightControl FlightControl; TAtomicBlockCounter QuitCounter; TString LastWarning; + bool ReadOnly; TDeque Trash; TMutex TrashMutex; @@ -781,7 +782,7 @@ class TRealBlockDevice : public IBlockDevice { public: TRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, - TIntrusivePtr sectorMap) + TIntrusivePtr sectorMap, bool readOnly) : Mon(mon) , ActorSystem(nullptr) , Path(path) @@ -803,6 +804,7 @@ class TRealBlockDevice : public IBlockDevice { , DeviceInFlight(FastClp2(deviceInFlight)) , FlightControl(CountTrailingZeroBits(DeviceInFlight)) , LastWarning(IsPowerOf2(deviceInFlight) ? "" : "Device inflight must be a power of 2") + , ReadOnly(readOnly) { if (sectorMap) { DriveData = TDriveData(); @@ -980,6 +982,7 @@ class TRealBlockDevice : public IBlockDevice { } void TrimSync(ui32 size, ui64 offset) override { + Y_ABORT_UNLESS(!ReadOnly); IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(nullptr, {}, nullptr); IoContext->PreparePTrim(op, size, offset); IsTrimEnabled = IoContext->DoTrim(op); @@ -1006,6 +1009,7 @@ class TRealBlockDevice : public IBlockDevice { void PwriteAsync(const void *data, ui64 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, NWilson::TTraceId *traceId) override { Y_ABORT_UNLESS(completionAction); + Y_ABORT_UNLESS(!ReadOnly); if (!IsInitialized) { completionAction->Release(ActorSystem); return; @@ -1022,6 +1026,7 @@ class TRealBlockDevice : public IBlockDevice { void FlushAsync(TCompletionAction *completionAction, TReqId reqId) override { Y_ABORT_UNLESS(completionAction); + Y_ABORT_UNLESS(!ReadOnly); if (!IsInitialized) { completionAction->Release(ActorSystem); return; @@ -1301,9 +1306,9 @@ class TCachedBlockDevice : public TRealBlockDevice { public: TCachedBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, - TIntrusivePtr sectorMap, TPDisk * const pdisk) + TIntrusivePtr sectorMap, TPDisk * const pdisk, bool readOnly) : TRealBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags, - maxQueuedCompletionActions, sectorMap) + maxQueuedCompletionActions, sectorMap, readOnly) , ReadsInFly(0), PDisk(pdisk) {} @@ -1441,14 +1446,14 @@ class TCachedBlockDevice : public TRealBlockDevice { IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, - TIntrusivePtr sectorMap, TPDisk * const pdisk) { + TIntrusivePtr sectorMap, TPDisk * const pdisk, bool readOnly) { return new TCachedBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags, - maxQueuedCompletionActions, sectorMap, pdisk); + maxQueuedCompletionActions, sectorMap, pdisk, readOnly); } IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags, - TIntrusivePtr sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk) { - IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap, pdisk); + TIntrusivePtr sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk, bool readOnly) { + IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap, pdisk, readOnly); device->Initialize(actorSystem, {}); return device; } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h index 499affa04043..94d9345e560e 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h @@ -155,6 +155,8 @@ struct TPDiskConfig : public TThrRefBase { NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; + bool ReadOnly = false; + TPDiskConfig(ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory) : TPDiskConfig({}, pDiskGuid, pdiskId, pDiskCategory) {} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp index 9093d599191e..346c4a4f74c4 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp @@ -48,7 +48,7 @@ TPDisk::TPDisk(const TIntrusivePtr cfg, const TIntrusivePtr<::NMon , BlockDevice(CreateRealBlockDevice(cfg->GetDevicePath(), cfg->PDiskId, Mon, HPCyclesMs(ReorderingMs), DriveModel.SeekTimeNs(), cfg->DeviceInFlight, TDeviceMode::LockFile | (cfg->UseSpdkNvmeDriver ? TDeviceMode::UseSpdk : 0), - cfg->MaxQueuedCompletionActions, cfg->SectorMap, this)) + cfg->MaxQueuedCompletionActions, cfg->SectorMap, this, cfg->ReadOnly)) , Cfg(cfg) , CreationTime(TInstant::Now()) , ExpectedSlotCount(cfg->ExpectedSlotCount) @@ -1737,14 +1737,14 @@ void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 user // Owner initialization //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void TPDisk::ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str) { +void TPDisk::ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str, NKikimrProto::EReplyStatus status) { TStringStream error; error << "PDiskId# " << PDiskId << " YardInit error for VDiskId# " << evYardInit.VDisk.ToStringWOGeneration() << " reason# " << str; LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, error.Str()); ui64 writeBlockSize = ForsetiOpPieceSizeCached; ui64 readBlockSize = ForsetiOpPieceSizeCached; - ActorSystem->Send(evYardInit.Sender, new NPDisk::TEvYardInitResult(NKikimrProto::ERROR, + ActorSystem->Send(evYardInit.Sender, new NPDisk::TEvYardInitResult(status, DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize, DriveModel.BulkWriteBlockSize(), @@ -1779,8 +1779,12 @@ bool TPDisk::YardInitForKnownVDisk(TYardInit &evYardInit, TOwner owner) { ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(ownerData.OperationLog, "YardInitForKnownVDisk, OwnerId# " << owner << ", evYardInit# " << evYardInit.ToString()); - TFirstUncommitted firstUncommitted = CommonLogger->FirstUncommitted.load(); - ownerData.LogEndPosition = TOwnerData::TLogEndPosition(firstUncommitted.ChunkIdx, firstUncommitted.SectorIdx); + if (Cfg->ReadOnly) { + ownerData.LogEndPosition = TOwnerData::TLogEndPosition(LastInitialChunkIdx, LastInitialSectorIdx); + } else { + TFirstUncommitted firstUncommitted = CommonLogger->FirstUncommitted.load(); + ownerData.LogEndPosition = TOwnerData::TLogEndPosition(firstUncommitted.ChunkIdx, firstUncommitted.SectorIdx); + } ownerData.OwnerRound = evYardInit.OwnerRound; TOwnerRound ownerRound = evYardInit.OwnerRound; @@ -1904,6 +1908,11 @@ void TPDisk::YardInitFinish(TYardInit &evYardInit) { return; } + if (Cfg->ReadOnly) { + ReplyErrorYardInitResult(evYardInit, "PDisk is in ReadOnly mode. Marker# BPD47", NKikimrProto::CORRUPTED); + return; + } + // Make sure owner round never decreases // Allocate quota for the owner // TODO(cthulhu): don't allocate more owners than expected @@ -3455,6 +3464,16 @@ void TPDisk::EnqueueAll() { while (InputQueue.GetWaitingSize() > 0) { TRequestBase* request = InputQueue.Pop(); + + if (Cfg->ReadOnly && HandleReadOnlyIfWrite(request)) { + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " ReqId# %" PRIu64 + " got write request in ReadOnly mode type# %" PRIu64, + (ui32)PDiskId, (ui64)request->ReqId.Id, (ui32)request->GetType()); + + delete request; + return; + } + AtomicSub(InputQueueCost, request->Cost); if (IsQueuePaused) { if (IsQueueStep) { @@ -3736,6 +3755,70 @@ void TPDisk::UpdateMinLogCostNs() { } } +// Handles write requests (only in read-only mode). Returns true, if request is a write request. +bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) { + const TActorId& sender = request->Sender; + TString errorReason = "PDisk is in read-only mode"; + + switch (request->GetType()) { + // Reads and other operations that can be processed in read-only mode. + case ERequestType::RequestLogRead: + case ERequestType::RequestLogReadContinue: + case ERequestType::RequestLogReadResultProcess: + case ERequestType::RequestLogSectorRestore: + case ERequestType::RequestChunkRead: + case ERequestType::RequestChunkReadPiece: + case ERequestType::RequestYardInit: + case ERequestType::RequestCheckSpace: + case ERequestType::RequestHarakiri: + case ERequestType::RequestYardSlay: + case ERequestType::RequestYardControl: + case ERequestType::RequestWhiteboartReport: + case ERequestType::RequestHttpInfo: + case ERequestType::RequestStopDevice: + case ERequestType::RequestUndelivered: + case ERequestType::RequestNop: + case ERequestType::RequestConfigureScheduler: + return false; + + // Can't be processed in read-only mode. + case ERequestType::RequestLogWrite: { + TLogWrite &ev = *static_cast(request); + NPDisk::TEvLogResult* result = new NPDisk::TEvLogResult(NKikimrProto::CORRUPTED, 0, errorReason); + result->Results.push_back(NPDisk::TEvLogResult::TRecord(ev.Lsn, ev.Cookie)); + ActorSystem->Send(sender, result); + return true; + } + case ERequestType::RequestChunkWrite: { + TChunkWrite &ev = *static_cast(request); + SendChunkWriteError(ev, errorReason, NKikimrProto::CORRUPTED); + return true; + } + case ERequestType::RequestChunkReserve: + ActorSystem->Send(sender, new NPDisk::TEvChunkReserveResult(NKikimrProto::CORRUPTED, 0, errorReason)); + return true; + case ERequestType::RequestChunkLock: + ActorSystem->Send(sender, new NPDisk::TEvChunkLockResult(NKikimrProto::CORRUPTED, {}, 0, errorReason)); + return true; + case ERequestType::RequestChunkUnlock: + ActorSystem->Send(sender, new NPDisk::TEvChunkUnlockResult(NKikimrProto::CORRUPTED, 0, errorReason)); + return true; + case ERequestType::RequestChunkForget: + ActorSystem->Send(sender, new NPDisk::TEvChunkForgetResult(NKikimrProto::CORRUPTED, 0, errorReason)); + return true; + + case ERequestType::RequestTryTrimChunk: + case ERequestType::RequestReleaseChunks: + case ERequestType::RequestChunkWritePiece: + case ERequestType::RequestChunkTrim: + case ERequestType::RequestAskForCutLog: + case ERequestType::RequestCommitLogChunks: + case ERequestType::RequestLogCommitDone: + // These requests don't require response. + return true; + } +} + void TPDisk::AddCbs(ui32 ownerId, EGate gate, const char *gateName, ui64 minBudget) { if (!ForsetiScheduler.GetCbs(ownerId, gate)) { NSchLab::TCbs cbs; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h index a120f3773202..c4f0f5013573 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h @@ -177,6 +177,10 @@ class TPDisk : public IPDisk { TIntrusivePtr Cfg; TInstant CreationTime; + // Last chunk and sector indexes we have seen on initial log read. + // Used to limit log reading in read-only mode. + ui32 LastInitialChunkIdx; + ui64 LastInitialSectorIdx; ui64 ExpectedSlotCount = 0; // Number of slots to use for space limit calculation. @@ -320,7 +324,7 @@ class TPDisk : public IPDisk { TString textMessage, const bool isErasureEncodeUserLog, const bool trimEntireDevice); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Owner initialization - void ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str); + void ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str, NKikimrProto::EReplyStatus status = NKikimrProto::ERROR); TOwner FindNextOwnerId(); bool YardInitStart(TYardInit &evYardInit); void YardInitFinish(TYardInit &evYardInit); @@ -389,6 +393,7 @@ class TPDisk : public IPDisk { void AddCbs(ui32 ownerId, EGate gate, const char *gateName, ui64 minBudget); void AddCbsSet(ui32 ownerId); void UpdateMinLogCostNs(); + bool HandleReadOnlyIfWrite(TRequestBase *request); }; void ParsePayloadFromSectorOffset(const TDiskFormat& format, ui64 firstSector, ui64 lastSector, ui64 currentSector, diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp index cec8e9a0b556..680dc1a7c632 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp @@ -25,18 +25,20 @@ void TPDisk::RenderState(IOutputStream &str, THttpInfo &httpInfo) { TABLEBODY() { TABLER() { TABLED() {str << "PDisk";} + TString stateStr = TStringBuilder() << TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()) << (Cfg->ReadOnly ? " (readonly)" : ""); + TString briefStateStr = TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()); switch(Mon.PDiskBriefState->Val()) { case TPDiskMon::TPDisk::OK: - TABLED() {GREEN_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} - TABLED() {GREEN_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} + TABLED() {GREEN_TEXT(str, stateStr);} + TABLED() {GREEN_TEXT(str, briefStateStr);} break; case TPDiskMon::TPDisk::Booting: - TABLED() {YELLOW_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} - TABLED() {YELLOW_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} + TABLED() {YELLOW_TEXT(str, stateStr);} + TABLED() {YELLOW_TEXT(str, briefStateStr);} break; case TPDiskMon::TPDisk::Error: - TABLED() {RED_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} - TABLED() {RED_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} + TABLED() {RED_TEXT(str, stateStr);} + TABLED() {RED_TEXT(str, briefStateStr);} break; } TABLED() {str << TPDiskMon::TPDisk::DetailedStateToStr(Mon.PDiskDetailedState->Val());} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp index 0ab90797ac24..bf4705f76be4 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp @@ -35,7 +35,7 @@ class TLogFlushCompletionAction : public TCompletionAction { void Exec(TActorSystem *actorSystem) override { CommonLogger->FirstUncommitted = TFirstUncommitted(EndChunkIdx, EndSectorIdx); - + SetUpCompletionLogWrite(); CompletionLogWrite->Exec(actorSystem); @@ -66,8 +66,8 @@ bool TPDisk::InitCommonLogger() { ui64 sectorIdx = (InitialLogPosition.OffsetInChunk + Format.SectorSize - 1) / Format.SectorSize; TLogChunkInfo *info = &*std::find_if(LogChunks.begin(), LogChunks.end(), [=](const TLogChunkInfo& i) { - return i.ChunkIdx == chunkIdx; - }); + return i.ChunkIdx == chunkIdx; + }); if (sectorIdx >= UsableSectorsPerLogChunk() && InitialTailBuffer) { InitialTailBuffer->Release(ActorSystem); @@ -84,7 +84,7 @@ bool TPDisk::InitCommonLogger() { } CommonLogger->SwitchToNewChunk(TReqId(TReqId::InitCommonLoggerSwitchToNewChunk, 0), nullptr); - // Log chunk can be collected as soon as noone needs it + // Log chunk can be collected as soon as no one needs it ChunkState[chunkIdx].CommitState = TChunkState::DATA_COMMITTED; } bool isOk = LogNonceJump(InitialPreviousNonce); @@ -597,14 +597,19 @@ void TPDisk::ProcessLogReadQueue() { ui32 endLogChunkIdx; ui64 endLogSectorIdx; - TOwnerData::TLogEndPosition &logEndPos = ownerData.LogEndPosition; - if (logEndPos.ChunkIdx == 0 && logEndPos.SectorIdx == 0) { - TFirstUncommitted firstUncommitted = CommonLogger->FirstUncommitted.load(); - endLogChunkIdx = firstUncommitted.ChunkIdx; - endLogSectorIdx = firstUncommitted.SectorIdx; + if (Cfg->ReadOnly) { + endLogChunkIdx = LastInitialChunkIdx; + endLogSectorIdx = LastInitialSectorIdx; } else { - endLogChunkIdx = logEndPos.ChunkIdx; - endLogSectorIdx = logEndPos.SectorIdx; + TOwnerData::TLogEndPosition &logEndPos = ownerData.LogEndPosition; + if (logEndPos.ChunkIdx == 0 && logEndPos.SectorIdx == 0) { + TFirstUncommitted firstUncommitted = CommonLogger->FirstUncommitted.load(); + endLogChunkIdx = firstUncommitted.ChunkIdx; + endLogSectorIdx = firstUncommitted.SectorIdx; + } else { + endLogChunkIdx = logEndPos.ChunkIdx; + endLogSectorIdx = logEndPos.SectorIdx; + } } ownerData.LogReader = new TLogReader(false, @@ -1416,6 +1421,10 @@ void TPDisk::ProcessReadLogResult(const NPDisk::TEvReadLogResult &evReadLogResul "Error while parsing common log at booting state")); return; } + + LastInitialChunkIdx = evReadLogResult.LastGoodChunkIdx; + LastInitialSectorIdx = evReadLogResult.LastGoodSectorIdx; + // Prepare the FreeChunks list InitFreeChunks(); // Actualize LogChunks counters according to OwnerData @@ -1509,13 +1518,17 @@ void TPDisk::ProcessReadLogResult(const NPDisk::TEvReadLogResult &evReadLogResul InitSysLogger(); InitPhase = EInitPhase::Initialized; - if (!InitCommonLogger()) { - // TODO: report red zone - *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::CommonLoggerInitError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorCommonLoggerInit; - ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, "Error in common logger init")); - return; + + if (!Cfg->ReadOnly) { + // We don't need logger in ReadOnly mode. + if (!InitCommonLogger()) { + // TODO: report red zone + *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::CommonLoggerInitError; + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorCommonLoggerInit; + ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, "Error in common logger init")); + return; + } } // Now it's ok to write both logs and data. @@ -1523,9 +1536,13 @@ void TPDisk::ProcessReadLogResult(const NPDisk::TEvReadLogResult &evReadLogResul *Mon.PDiskBriefState = TPDiskMon::TPDisk::OK; *Mon.PDiskDetailedState = TPDiskMon::TPDisk::EverythingIsOk; - auto completion = MakeHolder(this, pDiskActor, new TEvLogInitResult(true, "OK")); - ReleaseUnusedLogChunks(completion.Get()); - WriteSysLogRestorePoint(completion.Release(), TReqId(TReqId::AfterInitCommonLoggerSysLog, 0), {}); + if (Cfg->ReadOnly) { + ActorSystem->Send(pDiskActor, new TEvLogInitResult(true, "OK")); + } else { + auto completion = MakeHolder(this, pDiskActor, new TEvLogInitResult(true, "OK")); + ReleaseUnusedLogChunks(completion.Get()); + WriteSysLogRestorePoint(completion.Release(), TReqId(TReqId::AfterInitCommonLoggerSysLog, 0), {}); + } // Output the fully initialized state for each owner and each chunk. LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp index 906da1274522..d4250fa44073 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp @@ -1162,6 +1162,10 @@ void TLogReader::ReplyOk() { Result->Status = NKikimrProto::OK; Result->NextPosition = IsInitial ? LastGoodToWriteLogPosition : TLogPosition::Invalid(); Result->IsEndOfLog = true; + if (IsInitial) { + Result->LastGoodChunkIdx = ChunkIdx; + Result->LastGoodSectorIdx = SectorIdx; + } Reply(); } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp index e10276810bb0..9910910cd8c2 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp @@ -1112,4 +1112,81 @@ Y_UNIT_TEST_SUITE(PDiskCompatibilityInfo) { } } + +Y_UNIT_TEST_SUITE(ReadOnlyPDisk) { + Y_UNIT_TEST(SimpleRestartReadOnly) { + TActorTestContext testCtx{{}}; + + auto cfg = testCtx.GetPDiskConfig(); + cfg->ReadOnly = true; + testCtx.UpdateConfigRecreatePDisk(cfg); + } + + Y_UNIT_TEST(StartReadOnlyUnformattedShouldFail) { + TActorTestContext testCtx{{ + .ReadOnly = true, + }}; + auto res = testCtx.TestResponse( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStart, (void*)(&testCtx.MainKey)), + NKikimrProto::CORRUPTED); + + UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "Magic sector is not present on disk"); + } + + Y_UNIT_TEST(StartReadOnlyZeroedShouldFail) { + TActorTestContext testCtx{{ + .ReadOnly = true, + .InitiallyZeroed = true, + }}; + auto res = testCtx.TestResponse( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStart, (void*)(&testCtx.MainKey)), + NKikimrProto::CORRUPTED); + + UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode"); + } + + Y_UNIT_TEST(VDiskStartsOnReadOnlyPDisk) { + TActorTestContext testCtx{{}}; + TVDiskMock vdisk(&testCtx); + vdisk.InitFull(); + vdisk.SendEvLogSync(); + + auto cfg = testCtx.GetPDiskConfig(); + cfg->ReadOnly = true; + testCtx.UpdateConfigRecreatePDisk(cfg); + + vdisk.Init(); // Should start ok. + vdisk.ReadLog(); // Should be able to read log. + { + // Should fail on writing log. + auto evLog = MakeHolder(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 0, TRcBuf(PrepareData(1)), + TLsnSeg(), nullptr); + auto res = testCtx.TestResponse(evLog.Release(), NKikimrProto::CORRUPTED); + + UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode"); + } + { + // Should fail on reserving chunk. + auto res = testCtx.TestResponse( + new NPDisk::TEvChunkReserve(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 1), + NKikimrProto::CORRUPTED); + + UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode"); + } + { + // Should fail on writing chunk. + TString chunkWriteData = PrepareData(1); + auto counter = MakeIntrusive<::NMonitoring::TCounterForPtr>(); + TMemoryConsumer consumer(counter); + TTrackableBuffer buffer(std::move(consumer), chunkWriteData.data(), chunkWriteData.size()); + auto res = testCtx.TestResponse( + new NPDisk::TEvChunkWrite(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, + 0, 0, new NPDisk::TEvChunkWrite::TBufBackedUpParts(std::move(buffer)), nullptr, false, 0), + NKikimrProto::CORRUPTED); + + UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode"); + } + } +} + } // namespace NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h index ce6e2785c1d9..ee9a4d994a41 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h @@ -27,6 +27,7 @@ struct TActorTestContext { bool SmallDisk = false; bool SuppressCompatibilityCheck = false; TAutoPtr LogBackend = nullptr; + bool ReadOnly = false; bool InitiallyZeroed = false; // Only for sector map. Zero first 1MiB on start. }; @@ -59,7 +60,7 @@ struct TActorTestContext { TestCtx.SectorMap->ZeroInit(1_MB / NPDisk::NSectorMap::SECTOR_SIZE); } - if (!Settings.InitiallyZeroed) { + if (!Settings.InitiallyZeroed && !Settings.ReadOnly) { if (Settings.DiskSize) { FormatPDiskForTest(path, formatGuid, Settings.ChunkSize, Settings.DiskSize, false, TestCtx.SectorMap, Settings.SmallDisk); } else { @@ -76,6 +77,7 @@ struct TActorTestContext { pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; pDiskConfig->FeatureFlags.SetEnableSmallDiskOptimization(Settings.SmallDisk); pDiskConfig->FeatureFlags.SetSuppressCompatibilityCheck(Settings.SuppressCompatibilityCheck); + pDiskConfig->ReadOnly = Settings.ReadOnly; return pDiskConfig; } @@ -202,7 +204,7 @@ struct TActorTestContext { THolder evRes = Recv(); if (status.has_value()) { - UNIT_ASSERT_C(evRes->Status == status.value(), evRes->ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(evRes->Status, status.value(), evRes->ToString()); } UNIT_ASSERT(evRes->Status == NKikimrProto::OK || !evRes->ErrorReason.empty()); @@ -261,6 +263,7 @@ struct TVDiskMock { const auto evInitRes = TestCtx->TestResponse( new NPDisk::TEvYardInit(OwnerRound.fetch_add(1), VDiskID, TestCtx->TestCtx.PDiskGuid), NKikimrProto::OK); + PDiskParams = evInitRes->PDiskParams; TSet commited = Chunks[EChunkState::COMMITTED]; diff --git a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp index a37412dbb259..b45baab4642d 100644 --- a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp +++ b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp @@ -38,6 +38,7 @@ struct TPDiskMockState::TImpl { const ui32 ChunkSize; const ui32 TotalChunks; const ui32 AppendBlockSize; + bool IsDiskReadOnly; std::map Owners; std::set FreeChunks; ui32 NextFreeChunk = 1; @@ -48,7 +49,7 @@ struct TPDiskMockState::TImpl { TString StateErrorReason; NPDisk::EDeviceType DeviceType; - TImpl(ui32 nodeId, ui32 pdiskId, ui64 pdiskGuid, ui64 size, ui32 chunkSize, NPDisk::EDeviceType deviceType) + TImpl(ui32 nodeId, ui32 pdiskId, ui64 pdiskGuid, ui64 size, ui32 chunkSize, bool isDiskReadOnly, NPDisk::EDeviceType deviceType) : NodeId(nodeId) , PDiskId(pdiskId) , PDiskGuid(pdiskGuid) @@ -56,6 +57,7 @@ struct TPDiskMockState::TImpl { , ChunkSize(chunkSize) , TotalChunks(Size / ChunkSize) , AppendBlockSize(4096) + , IsDiskReadOnly(isDiskReadOnly) , NextFreeChunk(1) , StatusFlags(NPDisk::TStatusFlags{}) , DeviceType(deviceType) @@ -282,9 +284,9 @@ struct TPDiskMockState::TImpl { } }; -TPDiskMockState::TPDiskMockState(ui32 nodeId, ui32 pdiskId, ui64 pdiskGuid, ui64 size, ui32 chunkSize, +TPDiskMockState::TPDiskMockState(ui32 nodeId, ui32 pdiskId, ui64 pdiskGuid, ui64 size, ui32 chunkSize, bool isDiskReadOnly, NPDisk::EDeviceType deviceType) - : TPDiskMockState(std::make_unique(nodeId, pdiskId, pdiskGuid, size, chunkSize, deviceType)) + : TPDiskMockState(std::make_unique(nodeId, pdiskId, pdiskGuid, size, chunkSize, isDiskReadOnly, deviceType)) {} TPDiskMockState::TPDiskMockState(std::unique_ptr&& impl) @@ -330,6 +332,10 @@ void TPDiskMockState::SetReadOnly(const TVDiskID& vDiskId, bool isReadOnly) { Impl->SetReadOnly(vDiskId, isReadOnly); } +bool TPDiskMockState::IsDiskReadOnly() const { + return Impl->IsDiskReadOnly; +} + TString& TPDiskMockState::GetStateErrorReason() { return Impl->StateErrorReason; } @@ -841,6 +847,7 @@ class TPDiskMockActor : public TActorBootstrapped { bool restartAllowed = ev->Get()->RestartAllowed; if (restartAllowed) { + Impl.IsDiskReadOnly = ev->Get()->Config->ReadOnly; Send(ev->Sender, new TEvBlobStorage::TEvNotifyWardenPDiskRestarted(Impl.PDiskId)); } } @@ -970,6 +977,7 @@ class TPDiskMockActor : public TActorBootstrapped { hFunc(NPDisk::TEvSlay, ErrorHandle); hFunc(NPDisk::TEvChunkReserve, ErrorHandle); hFunc(NPDisk::TEvChunkForget, ErrorHandle); + hFunc(TEvBlobStorage::TEvAskWardenRestartPDiskResult, Handle); cFunc(TEvents::TSystem::Wakeup, ReportMetrics); cFunc(EvBecomeNormal, HandleMoveToNormalState); diff --git a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.h b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.h index 919027fe4c0f..2ab9c0e85384 100644 --- a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.h +++ b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.h @@ -22,7 +22,7 @@ namespace NKikimr { public: TPDiskMockState(ui32 nodeId, ui32 pdiskId, ui64 pdiskGuid, ui64 size, ui32 chunkSize = 128 << 20, - NPDisk::EDeviceType deviceType = NPDisk::EDeviceType::DEVICE_TYPE_NVME); + bool isDiskReadOnly = false, NPDisk::EDeviceType deviceType = NPDisk::EDeviceType::DEVICE_TYPE_NVME); TPDiskMockState(std::unique_ptr&& impl); ~TPDiskMockState(); @@ -40,6 +40,8 @@ namespace NKikimr { TPtr Snapshot(); // create a copy of PDisk whole state void SetReadOnly(const TVDiskID& vDiskId, bool isReadOnly); + + bool IsDiskReadOnly() const; }; IActor *CreatePDiskMockActor(TPDiskMockState::TPtr state); diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h index ad829ba64eaf..dcf8a30cd7eb 100644 --- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h +++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h @@ -70,7 +70,7 @@ struct TEnvironmentSetup { TIntrusivePtr& state = Env.PDiskMockStates[key]; if (!state) { state.Reset(new TPDiskMockState(nodeId, pdiskId, cfg->PDiskGuid, ui64(10) << 40, cfg->ChunkSize, - Env.Settings.DiskType)); + cfg->ReadOnly, Env.Settings.DiskType)); } const TActorId& actorId = ctx.Register(CreatePDiskMockActor(state), TMailboxType::HTSwap, poolId); const TActorId& serviceId = MakeBlobStoragePDiskID(nodeId, pdiskId); diff --git a/ydb/core/blobstorage/ut_blobstorage/read_only_pdisk.cpp b/ydb/core/blobstorage/ut_blobstorage/read_only_pdisk.cpp new file mode 100644 index 000000000000..fb0788cb4c0d --- /dev/null +++ b/ydb/core/blobstorage/ut_blobstorage/read_only_pdisk.cpp @@ -0,0 +1,368 @@ +#include +#include +#include + +Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) { + + Y_UNIT_TEST(ReadOnlyNotAllowed) { + TEnvironmentSetup env({ + .NodeCount = 10, + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block + }); + + std::unordered_map diskGuids; + + { + env.CreateBoxAndPool(1, 10); + + env.Sim(TDuration::Seconds(30)); + + auto config = env.FetchBaseConfig(); + + for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pdisk : config.GetPDisk()) { + TPDiskId diskId(pdisk.GetNodeId(), pdisk.GetPDiskId()); + + diskGuids[diskId] = pdisk.GetGuid(); + } + + env.Sim(TDuration::Seconds(30)); + } + + int i = 0; + auto it = diskGuids.begin(); + + for (; it != diskGuids.end(); it++, i++) { + auto& diskId = it->first; + + NKikimrBlobStorage::TConfigRequest request; + request.SetIgnoreDegradedGroupsChecks(true); + + NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly(); + auto pdiskId = cmd->MutableTargetPDiskId(); + cmd->SetValue(true); + pdiskId->SetNodeId(diskId.NodeId); + pdiskId->SetPDiskId(diskId.PDiskId); + + auto response = env.Invoke(request); + + if (i < 2) { + // Two disks can be set ReadOnly. + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + } else { + // Restarting third disk will not be allowed. + UNIT_ASSERT_C(!response.GetSuccess(), "Restart should've been prohibited"); + + UNIT_ASSERT_STRING_CONTAINS(response.GetErrorDescription(), "Disintegrated"); + break; + } + } + } + + class TDummyActor : public TActor { + public: + TDummyActor() : TActor(&TThis::StateFunc) {} + + void StateFunc(TAutoPtr& ev) { + Y_UNUSED(ev); + } + }; + + void Invoke(TEnvironmentSetup& env, const NKikimrBlobStorage::TConfigRequest& request) { + TActorId actorId = env.Runtime->Register(new TDummyActor(), TActorId(), 0, std::nullopt, env.Settings.ControllerNodeId); + auto ev = std::make_unique(); + ev->Record.MutableRequest()->CopyFrom(request); + env.Runtime->SendToPipe(env.TabletId, actorId, ev.release(), 0, TTestActorSystem::GetPipeConfigWithRetries()); + } + + Y_UNIT_TEST(RestartAndReadOnlyConsecutive) { + // This test ensures that restart that sets disk to read-only is not lost when regular restart is in progress. + TEnvironmentSetup env({ + .NodeCount = 10, + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block + }); + + std::unordered_map diskGuids; + + { + env.CreateBoxAndPool(1, 10); + + env.Sim(TDuration::Seconds(30)); + + auto config = env.FetchBaseConfig(); + + for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pdisk : config.GetPDisk()) { + TPDiskId diskId(pdisk.GetNodeId(), pdisk.GetPDiskId()); + + diskGuids[diskId] = pdisk.GetGuid(); + } + + env.Sim(TDuration::Seconds(30)); + } + + auto& diskId = diskGuids.begin()->first; + + { + NKikimrBlobStorage::TConfigRequest request; + request.SetIgnoreDegradedGroupsChecks(true); + + NKikimrBlobStorage::TRestartPDisk* cmd = request.AddCommand()->MutableRestartPDisk(); + auto pdiskId = cmd->MutableTargetPDiskId(); + pdiskId->SetNodeId(diskId.NodeId); + pdiskId->SetPDiskId(diskId.PDiskId); + + Invoke(env, request); + } + + { + NKikimrBlobStorage::TConfigRequest request; + request.SetIgnoreDegradedGroupsChecks(true); + + NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly(); + auto pdiskId = cmd->MutableTargetPDiskId(); + cmd->SetValue(true); + pdiskId->SetNodeId(diskId.NodeId); + pdiskId->SetPDiskId(diskId.PDiskId); + + auto response = env.Invoke(request); + + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + } + + TInstant barrier = env.Runtime->GetClock() + TDuration::Minutes(5); + + bool gotReport = false; + + env.Runtime->Sim([&] { return env.Runtime->GetClock() <= barrier && !gotReport; }, [&](IEventHandle &witnessedEvent) { + switch (witnessedEvent.GetTypeRewrite()) { + case TEvBlobStorage::TEvControllerNodeReport::EventType: { + auto *report = witnessedEvent.Get(); + if (report) { + auto& reports = report->Record.GetPDiskReports(); + UNIT_ASSERT_VALUES_EQUAL(1, reports.size()); + auto& report = reports[0]; + auto pdiskId = report.GetPDiskId(); + auto phase = report.GetPhase(); + UNIT_ASSERT_VALUES_EQUAL(diskId.PDiskId, pdiskId); + UNIT_ASSERT_EQUAL(NKikimrBlobStorage::TEvControllerNodeReport::PD_RESTARTED, phase); + gotReport = true; + } + break; + } + } + }); + + UNIT_ASSERT(gotReport); + + auto stateIt = env.PDiskMockStates.find(std::pair(diskId.NodeId, diskId.PDiskId)); + + UNIT_ASSERT(stateIt != env.PDiskMockStates.end()); + + UNIT_ASSERT(stateIt->second->IsDiskReadOnly()); + } + + Y_UNIT_TEST(ReadOnlyOneByOne) { + TEnvironmentSetup env({ + .NodeCount = 10, + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block + }); + + std::unordered_map diskGuids; + + { + env.CreateBoxAndPool(1, 10); + + env.Sim(TDuration::Seconds(30)); + + auto config = env.FetchBaseConfig(); + + for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pdisk : config.GetPDisk()) { + TPDiskId diskId(pdisk.GetNodeId(), pdisk.GetPDiskId()); + + diskGuids[diskId] = pdisk.GetGuid(); + } + + env.Sim(TDuration::Seconds(30)); + } + + int i = 0; + auto it = diskGuids.begin(); + + for (; it != diskGuids.end(); it++, i++) { + auto& diskId = it->first; + + for (auto val : {true, false}) { + NKikimrBlobStorage::TConfigRequest request; + request.SetIgnoreDegradedGroupsChecks(true); + + NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly(); + auto pdiskId = cmd->MutableTargetPDiskId(); + cmd->SetValue(val); + pdiskId->SetNodeId(diskId.NodeId); + pdiskId->SetPDiskId(diskId.PDiskId); + + Invoke(env, request); + + TInstant barrier = env.Runtime->GetClock() + TDuration::Minutes(5); + + bool gotServiceSetUpdate = false; + bool gotConfigResponse = false; + env.Runtime->Sim([&] { return env.Runtime->GetClock() <= barrier && (!gotServiceSetUpdate || !gotConfigResponse); }, [&](IEventHandle &witnessedEvent) { + switch (witnessedEvent.GetTypeRewrite()) { + case TEvBlobStorage::TEvControllerNodeServiceSetUpdate::EventType: { + auto *serviceSetUpdate = witnessedEvent.Get(); + if (serviceSetUpdate) { + const auto &pdisks = serviceSetUpdate->Record.GetServiceSet().GetPDisks(); + const auto &pdisk = pdisks[0]; + UNIT_ASSERT_EQUAL(NKikimrBlobStorage::INITIAL, pdisk.GetEntityStatus()); + UNIT_ASSERT_VALUES_EQUAL(val, pdisk.GetReadOnly()); + gotServiceSetUpdate = true; + } + break; + } + case TEvBlobStorage::TEvControllerConfigResponse::EventType: { + auto *configResponse = witnessedEvent.Get(); + if (configResponse) { + const auto &response = configResponse->Record.GetResponse(); + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + gotConfigResponse = true; + } + break; + } + } + }); + + UNIT_ASSERT(gotServiceSetUpdate); + UNIT_ASSERT(gotConfigResponse); + + // Wait for VSlot to become ready after PDisk restart due to ReadOnly status being changed. + env.Sim(TDuration::Seconds(30)); + } + } + } + + auto GetGroupVDisks(TEnvironmentSetup& env) { + struct TVDisk { + ui32 NodeId; + ui32 PDiskId; + ui32 VSlotId; + TVDiskID VDiskId; + }; + + std::vector vdisks; + + auto config = env.FetchBaseConfig(); + + auto& group = config.get_idx_group(0); + + for (auto& vslot : config.GetVSlot()) { + if (group.GetGroupId() == vslot.GetGroupId()) { + auto slotId = vslot.GetVSlotId(); + auto nodeId = slotId.GetNodeId(); + auto pdiskId = slotId.GetPDiskId(); + auto vdiskId = TVDiskID(group.GetGroupId(), group.GetGroupGeneration(), vslot.GetFailRealmIdx(), vslot.GetFailDomainIdx(), vslot.GetVDiskIdx()); + vdisks.push_back({nodeId, pdiskId, slotId.GetVSlotId(), vdiskId}); + } + } + + return vdisks; + } + + Y_UNIT_TEST(SetBrokenDiskInBrokenGroupReadOnly) { + TEnvironmentSetup env({ + .NodeCount = 8, + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block + }); + + env.UpdateSettings(false, false); + env.CreateBoxAndPool(1, 1); + env.Sim(TDuration::Seconds(30)); + + auto vdisks = GetGroupVDisks(env); + + // Making all vdisks bad, group is disintegrated + const TActorId sender = env.Runtime->AllocateEdgeActor(env.Settings.ControllerNodeId, __FILE__, __LINE__); + for (auto& pdisk : env.PDiskActors) { + env.Runtime->WrapInActorContext(sender, [&] () { + env.Runtime->Send(new IEventHandle(EvBecomeError, 0, pdisk, sender, nullptr, 0)); + }); + } + + env.Sim(TDuration::Minutes(1)); + + // Restarting the owner of an already broken disk in a broken group must be allowed + auto& [targetNodeId, targetPDiskId, unused1, unused2] = vdisks[0]; + + NKikimrBlobStorage::TConfigRequest request; + + NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly(); + auto pdiskId = cmd->MutableTargetPDiskId(); + cmd->SetValue(true); + pdiskId->SetNodeId(targetNodeId); + pdiskId->SetPDiskId(targetPDiskId); + + auto response = env.Invoke(request); + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + + // Wait until pdisk restarts and node warden sends "pdisk restarted" to BSC. + TInstant barrier = env.Runtime->GetClock() + TDuration::Seconds(30); + bool gotPdiskReport = false; + env.Runtime->Sim([&] { return env.Runtime->GetClock() <= barrier && !gotPdiskReport; }, [&](IEventHandle &witnessedEvent) { + if (witnessedEvent.GetTypeRewrite() == TEvBlobStorage::TEvControllerNodeReport::EventType) { + auto *nodeReport = witnessedEvent.Get(); + if (nodeReport) { + const auto &pdisks = nodeReport->Record.GetPDiskReports(); + const auto &pdisk = pdisks[0]; + UNIT_ASSERT(pdisk.GetPhase() == NKikimrBlobStorage::TEvControllerNodeReport_EPDiskPhase_PD_RESTARTED); + gotPdiskReport = true; + } + } + }); + + UNIT_ASSERT(gotPdiskReport); + } + + Y_UNIT_TEST(SetGoodDiskInBrokenGroupReadOnlyNotAllowed) { + TEnvironmentSetup env({ + .NodeCount = 8, + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block + }); + + env.UpdateSettings(false, false); + env.CreateBoxAndPool(1, 1); + env.Sim(TDuration::Seconds(30)); + + // Making all but one vdisks bad, group is disintegrated + const TActorId sender = env.Runtime->AllocateEdgeActor(env.Settings.ControllerNodeId, __FILE__, __LINE__); + for (size_t i = 0; i < env.PDiskActors.size() - 1; i++) { + env.Runtime->WrapInActorContext(sender, [&] () { + env.Runtime->Send(new IEventHandle(EvBecomeError, 0, env.PDiskActors[i], sender, nullptr, 0)); + }); + } + + env.Sim(TDuration::Minutes(1)); + + ui32 targetNodeId = 0; + ui32 targetPDiskId = 0; + + for (auto& [k, v] : env.PDiskMockStates) { + if (v.Get()->GetStateErrorReason().empty()) { + targetNodeId = k.first; + targetPDiskId = k.second; + } + } + + // However making the owner of a single good disk ReadOnly must be prohibited + NKikimrBlobStorage::TConfigRequest request; + + NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly(); + auto pdiskId = cmd->MutableTargetPDiskId(); + cmd->SetValue(true); + pdiskId->SetNodeId(targetNodeId); + pdiskId->SetPDiskId(targetPDiskId); + + auto response = env.Invoke(request); + + UNIT_ASSERT_C(!response.GetSuccess(), "ReadOnly should've been prohibited"); + UNIT_ASSERT_STRING_CONTAINS(response.GetErrorDescription(), "Disintegrated"); + } +} diff --git a/ydb/core/blobstorage/ut_blobstorage/ut_read_only_pdisk/ya.make b/ydb/core/blobstorage/ut_blobstorage/ut_read_only_pdisk/ya.make new file mode 100644 index 000000000000..922c9846f3e4 --- /dev/null +++ b/ydb/core/blobstorage/ut_blobstorage/ut_read_only_pdisk/ya.make @@ -0,0 +1,15 @@ +UNITTEST_FOR(ydb/core/blobstorage/ut_blobstorage) + + FORK_SUBTESTS() + + SIZE(MEDIUM) + + SRCS( + read_only_pdisk.cpp + ) + + PEERDIR( + ydb/core/blobstorage/ut_blobstorage/lib + ) + +END() diff --git a/ydb/core/blobstorage/ut_blobstorage/ya.make b/ydb/core/blobstorage/ut_blobstorage/ya.make index ba965e9e83ca..61ee96dd2e2c 100644 --- a/ydb/core/blobstorage/ut_blobstorage/ya.make +++ b/ydb/core/blobstorage/ut_blobstorage/ya.make @@ -68,4 +68,5 @@ RECURSE_FOR_TESTS( ut_scrub ut_vdisk_restart ut_restart_pdisk + ut_read_only_pdisk ) diff --git a/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp b/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp index c1558816d674..72d30bc3bc45 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp @@ -162,6 +162,7 @@ void TAllPDisks::ActorSetupCmd(NActors::TActorSystemSetup *setup, ui32 node, TPDiskCategory(deviceType, 0).GetRaw())); pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; + pDiskConfig->ReadOnly = inst.ReadOnly; const NPDisk::TMainKey mainKey{ .Keys = { NPDisk::YdbDefaultPDiskSequence }, .IsInitialized = true }; TActorSetupCmd pDiskSetup(CreatePDisk(pDiskConfig.Get(), mainKey, counters), TMailboxType::Revolving, 0); @@ -249,7 +250,7 @@ bool TDefaultVDiskSetup::SetUp(TAllVDisks::TVDiskInstance &vdisk, TAllPDisks *pd NKikimr::TVDiskConfig::TBaseInfo baseInfo(vdisk.VDiskID, pdisk.PDiskActorID, pdisk.PDiskGuid, pdisk.PDiskID, NKikimr::NPDisk::DEVICE_TYPE_ROT, slotId, - NKikimrBlobStorage::TVDiskKind::Default, initOwnerRound, {}); + NKikimrBlobStorage::TVDiskKind::Default, initOwnerRound, {}, false, {}, 0, 0, pdisk.ReadOnly); vdisk.Cfg = MakeIntrusive(baseInfo); for (auto &modifier : ConfigModifiers) { diff --git a/ydb/core/blobstorage/ut_vdisk/lib/prepare.h b/ydb/core/blobstorage/ut_vdisk/lib/prepare.h index 56353d2de6b0..ed6c85f03380 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/prepare.h +++ b/ydb/core/blobstorage/ut_vdisk/lib/prepare.h @@ -51,6 +51,7 @@ struct TOnePDisk { const TString Filename; const ui32 ChunkSize; const ui64 DiskSize; + bool ReadOnly = false; TOnePDisk(ui32 pDiskId, ui64 pDiskGuid, const TString &filename, ui32 chunkSize, ui64 diskSize); diff --git a/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp b/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp index 422b139e9fd1..51ab6897fc21 100644 --- a/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp +++ b/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp @@ -712,6 +712,28 @@ Y_UNIT_TEST_SUITE(TBsVDiskRepl1) { Conf.Shutdown(); UNIT_ASSERT(success2); } + + Y_UNIT_TEST(ReadOnly) { + TSmallCommonDataSet dataSet; + ui32 domainsNum = 1u; + ui32 disksInDomain = 1u; + TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(1, 512u << 10u, 16ull << 30ull, "ROT"), + domainsNum, disksInDomain, NKikimr::TBlobStorageGroupType::ErasureNone); + TFastVDiskSetup vdiskSetup; + Conf.Prepare(&vdiskSetup); + TTestReplDataWriteAndSync testLoad(&dataSet); + bool success1 = Conf.Run(&testLoad, TIMEOUT); + UNIT_ASSERT(success1); + Conf.Shutdown(); + TOnePDisk &inst = Conf.PDisks->Get(1); + inst.ReadOnly = true; + + Conf.Prepare(&vdiskSetup, false); + TReadUntilSuccess testRead(&dataSet, 0, SMALL_TIMEOUT); + bool success = Conf.Run(&testRead, TIMEOUT); + UNIT_ASSERT(success); + Conf.Shutdown(); + } } Y_UNIT_TEST_SUITE(TBsVDiskRepl2) { diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index b56b6dfe11f4..4e7b26f9bdcd 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -69,7 +69,7 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() { TBlobStorageGroupInfo::TGroupVDisks failed(Topology.get()); TBlobStorageGroupInfo::TGroupVDisks failedByPDisk(Topology.get()); for (const TVSlotInfo *slot : VDisksInGroup) { - if (!slot->IsReady || slot->PDisk->Mood == TPDiskMood::Restarting) { + if (!slot->IsReady) { failed |= {Topology.get(), slot->GetShortVDiskId()}; } else if (!slot->PDisk->HasGoodExpectedStatus()) { failedByPDisk |= {Topology.get(), slot->GetShortVDiskId()}; @@ -462,6 +462,7 @@ ui32 TBlobStorageController::GetEventPriority(IEventHandle *ev) { case NKikimrBlobStorage::TConfigRequest::TCommand::kCancelVirtualGroup: case NKikimrBlobStorage::TConfigRequest::TCommand::kSetVDiskReadOnly: case NKikimrBlobStorage::TConfigRequest::TCommand::kRestartPDisk: + case NKikimrBlobStorage::TConfigRequest::TCommand::kSetPDiskReadOnly: return 2; // read-write commands go with higher priority as they are needed to keep cluster intact case NKikimrBlobStorage::TConfigRequest::TCommand::kReadHostConfig: diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index 4b7f53c6beb0..3311f2b75456 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -222,4 +222,33 @@ namespace NKikimr::NBsController { } } + void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TSetPDiskReadOnly& cmd, TStatus& /*status*/) { + auto targetPDiskId = cmd.GetTargetPDiskId(); + + TPDiskId pdiskId(targetPDiskId.GetNodeId(), targetPDiskId.GetPDiskId()); + + TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); + + if (!pdisk) { + throw TExPDiskNotFound(pdiskId.NodeId, pdiskId.PDiskId); + } + + if (cmd.GetValue()) { + pdisk->Mood = TPDiskMood::ReadOnly; + + for (const auto& [id, slot] : pdisk->VSlotsOnPDisk) { + if (slot->Group) { + auto *m = VSlots.FindForUpdate(slot->VSlotId); + m->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; + m->IsReady = false; + TGroupInfo *group = Groups.FindForUpdate(slot->Group->ID); + GroupFailureModelChanged.insert(slot->Group->ID); + group->CalculateGroupStatus(); + } + } + } else { + pdisk->Mood = TPDiskMood::Normal; + } + } + } // namespace NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index b63680720525..66ff14b2850e 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -117,6 +117,9 @@ namespace NKikimr::NBsController { case NBsController::TPDiskMood::EValue::Restarting: pdisk->SetEntityStatus(NKikimrBlobStorage::RESTART); break; + case NBsController::TPDiskMood::EValue::ReadOnly: + pdisk->SetReadOnly(true); + break; } return pdisk; diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h index 8c1bb321e5ec..1683b68da2dc 100644 --- a/ydb/core/mind/bscontroller/config.h +++ b/ydb/core/mind/bscontroller/config.h @@ -315,6 +315,7 @@ namespace NKikimr { void ExecuteStep(const NKikimrBlobStorage::TCancelVirtualGroup& cmd, TStatus& status); void ExecuteStep(const NKikimrBlobStorage::TSetVDiskReadOnly& cmd, TStatus& status); void ExecuteStep(const NKikimrBlobStorage::TRestartPDisk& cmd, TStatus& status); + void ExecuteStep(const NKikimrBlobStorage::TSetPDiskReadOnly& cmd, TStatus& status); }; } // NBsController diff --git a/ydb/core/mind/bscontroller/config_cmd.cpp b/ydb/core/mind/bscontroller/config_cmd.cpp index ca5e9f91da25..e5cc0c73b010 100644 --- a/ydb/core/mind/bscontroller/config_cmd.cpp +++ b/ydb/core/mind/bscontroller/config_cmd.cpp @@ -352,6 +352,7 @@ namespace NKikimr::NBsController { HANDLE_COMMAND(CancelVirtualGroup) HANDLE_COMMAND(SetVDiskReadOnly) HANDLE_COMMAND(RestartPDisk) + HANDLE_COMMAND(SetPDiskReadOnly) case NKikimrBlobStorage::TConfigRequest::TCommand::kAddMigrationPlan: case NKikimrBlobStorage::TConfigRequest::TCommand::kDeleteMigrationPlan: diff --git a/ydb/core/mind/bscontroller/mood.h b/ydb/core/mind/bscontroller/mood.h index ccb41e9a3870..be1baf098c9b 100644 --- a/ydb/core/mind/bscontroller/mood.h +++ b/ydb/core/mind/bscontroller/mood.h @@ -33,7 +33,8 @@ struct TMood { struct TPDiskMood { enum EValue : ui8 { Normal = 0, - Restarting = 1 + Restarting = 1, + ReadOnly = 2 }; static TString Name(const EValue value) { @@ -42,6 +43,8 @@ struct TPDiskMood { return "Normal"; case Restarting: return "Restarting"; + case ReadOnly: + return "ReadOnly"; } return Sprintf("Unknown%" PRIu64, (ui64)value); } diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index d0952a8bdf0b..1ec936181304 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -419,6 +419,9 @@ void TBlobStorageController::ReadPDisk(const TPDiskId& pdiskId, const TPDiskInfo pDisk->SetManagementStage(SerialManagementStage); pDisk->SetSpaceColorBorder(PDiskSpaceColorBorder); pDisk->SetEntityStatus(entityStatus); + if (pdisk.Mood == TPDiskMood::ReadOnly) { + pDisk->SetReadOnly(true); + } } void TBlobStorageController::ReadVSlot(const TVSlotInfo& vslot, TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result) { diff --git a/ydb/core/protos/blobstorage.proto b/ydb/core/protos/blobstorage.proto index 3c4aa292d253..d02c968219cc 100644 --- a/ydb/core/protos/blobstorage.proto +++ b/ydb/core/protos/blobstorage.proto @@ -1010,6 +1010,7 @@ message TNodeWardenServiceSet { optional TSerialManagementStage.E ManagementStage = 12; optional TPDiskSpaceColor.E SpaceColorBorder = 13; + optional bool ReadOnly = 14; } message TVDisk { diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index d3769317b6f8..c9db19ca268f 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -404,6 +404,11 @@ message TRestartPDisk { NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; } +message TSetPDiskReadOnly { + NKikimrBlobStorage.TPDiskId TargetPDiskId = 1; + bool Value = 2; +} + message TSetScrubPeriodicity { uint32 ScrubPeriodicity = 1; // in seconds; 0 = disable } @@ -537,6 +542,7 @@ message TConfigRequest { TCancelVirtualGroup CancelVirtualGroup = 44; TSetVDiskReadOnly SetVDiskReadOnly = 47; TRestartPDisk RestartPDisk = 48; + TSetPDiskReadOnly SetPDiskReadOnly = 49; // commands intended for internal use TReassignGroupDisk ReassignGroupDisk = 19;