Skip to content

Commit

Permalink
ReadOnly pdisk
Browse files Browse the repository at this point in the history
(cherry-picked from 9d8fcc7)
  • Loading branch information
SammyVimes committed Jan 2, 2025
1 parent 518ba6d commit 3e43a71
Show file tree
Hide file tree
Showing 33 changed files with 782 additions and 62 deletions.
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/nodewarden/node_warden_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@ namespace NKikimr::NStorage {
std::map<TVSlotId, TVDiskRecord> LocalVDisks;
THashMap<TActorId, TVSlotId> VDiskIdByActor;
std::map<TVSlotId, ui64> SlayInFlight;
std::set<ui32> PDiskRestartInFlight;
// PDiskId -> is another restart required after the current restart.
std::unordered_map<ui32, bool> PDiskRestartInFlight;
TIntrusiveList<TVDiskRecord, TUnreportedMetricTag> VDisksWithUnreportedMetrics;

void DestroyLocalVDisk(TVDiskRecord& vdisk);
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_mon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ void TNodeWarden::RenderWholePage(IOutputStream& out) {
}
}
}
if (!PDiskRestartInFlight.empty()) {
DIV() {
out << "PDiskRestartInFlight# [";
for (const auto& item : PDiskRestartInFlight) {
out << "pdiskId:" << item.first << " -> needsAnotherRestart: " << item.second << ", ";
}
out << "]";
}
}

TAG(TH3) { out << "VDisks"; }
TABLE_CLASS("table oddgray") {
Expand Down
36 changes: 32 additions & 4 deletions ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ namespace NKikimr::NStorage {
pdiskConfig->ExpectedSerial = pdisk.GetExpectedSerial();
}

if (pdisk.HasReadOnly()) {
pdiskConfig->ReadOnly = pdisk.GetReadOnly();
}

// Path scheme: "SectorMap:unique_name[:3000]"
// where '3000' is device size of in GiB.
if (path.Contains(":")) {
Expand Down Expand Up @@ -224,13 +228,25 @@ namespace NKikimr::NStorage {
}

void TNodeWarden::OnPDiskRestartFinished(ui32 pdiskId, NKikimrProto::EReplyStatus status) {
if (PDiskRestartInFlight.erase(pdiskId) == 0) {
auto it = PDiskRestartInFlight.find(pdiskId);
if (it == PDiskRestartInFlight.end()) {
// There was no restart in progress.
return;
}

bool requiresAnotherRestart = it->second;

PDiskRestartInFlight.erase(it);

const TPDiskKey pdiskKey(LocalNodeId, pdiskId);

if (requiresAnotherRestart) {
auto it = LocalPDisks.find(pdiskKey);
auto pdisk = it->second.Record;
DoRestartLocalPDisk(pdisk);
return;
}

const TVSlotId from(pdiskKey.NodeId, pdiskKey.PDiskId, 0);
const TVSlotId to(pdiskKey.NodeId, pdiskKey.PDiskId, Max<ui32>());

Expand Down Expand Up @@ -273,11 +289,12 @@ namespace NKikimr::NStorage {

STLOG(PRI_NOTICE, BS_NODE, NW75, "DoRestartLocalPDisk", (PDiskId, pdiskId));

const auto [_, inserted] = PDiskRestartInFlight.emplace(pdiskId);
const auto [restartIt, inserted] = PDiskRestartInFlight.try_emplace(pdiskId, false);

if (!inserted) {
STLOG(PRI_NOTICE, BS_NODE, NW76, "Restart already in progress", (PDiskId, pdiskId));
// Restart is already in progress.
// Restart is already in progress, but we will need to make a new restart, as the configuration changed.
restartIt->second = true;
return;
}

Expand Down Expand Up @@ -324,12 +341,23 @@ namespace NKikimr::NStorage {
continue;
}

const NKikimrBlobStorage::EEntityStatus entityStatus = pdisk.HasEntityStatus()
NKikimrBlobStorage::EEntityStatus entityStatus = pdisk.HasEntityStatus()
? pdisk.GetEntityStatus()
: NKikimrBlobStorage::INITIAL;

const TPDiskKey key(pdisk);

if (pdisk.HasReadOnly()) {
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
auto& record = it->second;

if (!record.Record.HasReadOnly() || record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
// Changing read-only flag requires restart.
entityStatus = NKikimrBlobStorage::RESTART;
}
}
}

switch (entityStatus) {
case NKikimrBlobStorage::RESTART:
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
Expand Down
3 changes: 3 additions & 0 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,9 @@ struct TEvReadLogResult : public TEventLocal<TEvReadLogResult, TEvBlobStorage::E
TLogPosition Position;
TLogPosition NextPosition;
bool IsEndOfLog;
ui32 LastGoodChunkIdx = 0;
ui64 LastGoodSectorIdx = 0;

TStatusFlags StatusFlags;
TString ErrorReason;
TOwner Owner;
Expand Down
25 changes: 20 additions & 5 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,21 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
TActorId pDiskActor = std::get<2>(*params);
delete params;

TPDiskConfig *cfg = actor->Cfg.Get();

if (cfg->ReadOnly) {
TString readOnlyError = "PDisk is in read-only mode";
LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, " << readOnlyError);
actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(false, readOnlyError));
return nullptr;
}

NPDisk::TKey chunkKey;
NPDisk::TKey logKey;
NPDisk::TKey sysLogKey;
EntropyPool().Read(&chunkKey, sizeof(NKikimr::NPDisk::TKey));
EntropyPool().Read(&logKey, sizeof(NKikimr::NPDisk::TKey));
EntropyPool().Read(&sysLogKey, sizeof(NKikimr::NPDisk::TKey));
TPDiskConfig *cfg = actor->Cfg.Get();

try {
try {
Expand Down Expand Up @@ -448,6 +456,13 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
NActors::TActorSystem* actorSystem = std::get<3>(*params);
TActorId pdiskActor = std::get<4>(*params);

if (cfg->ReadOnly) {
TString readOnlyError = "PDisk is in read-only mode";
LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, " << readOnlyError);
actorSystem->Send(pdiskActor, new TEvPDiskFormattingFinished(false, readOnlyError));
return nullptr;
}

THolder<NPDisk::TPDisk> pDisk(new NPDisk::TPDisk(cfg, counters));

pDisk->Initialize(actorSystem, TActorId());
Expand Down Expand Up @@ -1004,7 +1019,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
}

Send(ev->Sender, new TEvBlobStorage::TEvNotifyWardenPDiskRestarted(PDisk->PDiskId, NKikimrProto::EReplyStatus::NOTREADY));

return;
}

Expand All @@ -1017,7 +1032,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
NPDisk::TMainKey newMainKey = ev->Get()->MainKey;

SecureWipeBuffer((ui8*)ev->Get()->MainKey.Keys.data(), sizeof(NPDisk::TKey) * ev->Get()->MainKey.Keys.size());

LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId
<< " Going to restart PDisk since received TEvAskWardenRestartPDiskResult");

Expand All @@ -1031,7 +1046,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
TIntrusivePtr<TPDiskConfig> actorCfg = std::move(Cfg);

auto& newCfg = ev->Get()->Config;

if (newCfg) {
Y_VERIFY_S(newCfg->PDiskId == pdiskId,
"New config's PDiskId# " << newCfg->PDiskId << " is not equal to real PDiskId# " << pdiskId);
Expand All @@ -1046,7 +1061,7 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> {
TGenericExecutorThread& executorThread = actorCtx.ExecutorThread;

PassAway();

CreatePDiskActor(executorThread, counters, actorCfg, newMainKey, pdiskId, poolId, nodeId);

Send(ev->Sender, new TEvBlobStorage::TEvNotifyWardenPDiskRestarted(pdiskId));
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ class TPDisk;

IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon,
ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags,
ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk = nullptr);
ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk = nullptr, bool readOnly = false);
IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags,
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr);
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk = nullptr, bool readOnly = false);

} // NPDisk
} // NKikimr
19 changes: 12 additions & 7 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,7 @@ class TRealBlockDevice : public IBlockDevice {
TFlightControl FlightControl;
TAtomicBlockCounter QuitCounter;
TString LastWarning;
bool ReadOnly;
TDeque<IAsyncIoOperation*> Trash;
TMutex TrashMutex;

Expand All @@ -781,7 +782,7 @@ class TRealBlockDevice : public IBlockDevice {
public:
TRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles,
ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions,
TIntrusivePtr<TSectorMap> sectorMap)
TIntrusivePtr<TSectorMap> sectorMap, bool readOnly)
: Mon(mon)
, ActorSystem(nullptr)
, Path(path)
Expand All @@ -803,6 +804,7 @@ class TRealBlockDevice : public IBlockDevice {
, DeviceInFlight(FastClp2(deviceInFlight))
, FlightControl(CountTrailingZeroBits(DeviceInFlight))
, LastWarning(IsPowerOf2(deviceInFlight) ? "" : "Device inflight must be a power of 2")
, ReadOnly(readOnly)
{
if (sectorMap) {
DriveData = TDriveData();
Expand Down Expand Up @@ -980,6 +982,7 @@ class TRealBlockDevice : public IBlockDevice {
}

void TrimSync(ui32 size, ui64 offset) override {
Y_ABORT_UNLESS(!ReadOnly);
IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(nullptr, {}, nullptr);
IoContext->PreparePTrim(op, size, offset);
IsTrimEnabled = IoContext->DoTrim(op);
Expand All @@ -1006,6 +1009,7 @@ class TRealBlockDevice : public IBlockDevice {
void PwriteAsync(const void *data, ui64 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId,
NWilson::TTraceId *traceId) override {
Y_ABORT_UNLESS(completionAction);
Y_ABORT_UNLESS(!ReadOnly);
if (!IsInitialized) {
completionAction->Release(ActorSystem);
return;
Expand All @@ -1022,6 +1026,7 @@ class TRealBlockDevice : public IBlockDevice {

void FlushAsync(TCompletionAction *completionAction, TReqId reqId) override {
Y_ABORT_UNLESS(completionAction);
Y_ABORT_UNLESS(!ReadOnly);
if (!IsInitialized) {
completionAction->Release(ActorSystem);
return;
Expand Down Expand Up @@ -1301,9 +1306,9 @@ class TCachedBlockDevice : public TRealBlockDevice {
public:
TCachedBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles,
ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions,
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk)
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk, bool readOnly)
: TRealBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags,
maxQueuedCompletionActions, sectorMap)
maxQueuedCompletionActions, sectorMap, readOnly)
, ReadsInFly(0), PDisk(pdisk)
{}

Expand Down Expand Up @@ -1441,14 +1446,14 @@ class TCachedBlockDevice : public TRealBlockDevice {

IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles,
ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions,
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk) {
TIntrusivePtr<TSectorMap> sectorMap, TPDisk * const pdisk, bool readOnly) {
return new TCachedBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags,
maxQueuedCompletionActions, sectorMap, pdisk);
maxQueuedCompletionActions, sectorMap, pdisk, readOnly);
}

IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags,
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk) {
IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap, pdisk);
TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem, TPDisk * const pdisk, bool readOnly) {
IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap, pdisk, readOnly);
device->Initialize(actorSystem, {});
return device;
}
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ struct TPDiskConfig : public TThrRefBase {

NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN;

bool ReadOnly = false;

TPDiskConfig(ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory)
: TPDiskConfig({}, pDiskGuid, pdiskId, pDiskCategory)
{}
Expand Down
Loading

0 comments on commit 3e43a71

Please sign in to comment.