Skip to content

Commit

Permalink
Fix dstool, yard slay and add test
Browse files Browse the repository at this point in the history
  • Loading branch information
SammyVimes committed Jan 6, 2025
1 parent e31e0d4 commit c9823a6
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 66 deletions.
37 changes: 31 additions & 6 deletions ydb/apps/dstool/lib/dstool_cmd_cluster_workload_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,25 @@ def match(x):
if not grouptool.check_fail_model(content, group.ErasureSpecies):
return False
return True

def can_act_on_pdisk(node_id, pdisk_id):
def match(x):
return node_id == x[0] and pdisk_id == x[1]

for group in base_config.Group:
if any(map(match, map(common.get_vslot_id, group.VSlotId))):
if not common.is_dynamic_group(group.GroupId):
return False

content = {
common.get_vdisk_id_short(vslot): not match(vslot_id) and vslot.Ready and vdisk_status[vslot_id + common.get_vdisk_id(vslot)]
for vslot_id in map(common.get_vslot_id, group.VSlotId)
for vslot in [vslot_map[vslot_id]]
}
common.print_if_verbose(args, content, file=sys.stderr)
if not grouptool.check_fail_model(content, group.ErasureSpecies):
return False
return True

def do_restart(node_id):
host = node_fqdn_map[node_id]
Expand Down Expand Up @@ -276,12 +295,6 @@ def do_kill_blob_depot():
vdisk_id = '[%08x:%d:%d:%d]' % (vslot.GroupId, vslot.FailRealmIdx, vslot.FailDomainIdx, vslot.VDiskIdx)
if vslot_id in vslot_readonly and not args.disable_readonly:
unreadonlies.append(('un-readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, False)))
if can_act_on_vslot(*vslot_id[:2]) and args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_restart_pdisk, node_id, pdisk_id)))
if can_act_on_vslot(*vslot_id[:2]) and args.enable_readonly_pdisks:
make_pdisks_readonly.append(('readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, True)))
if (node_id, pdisk_id) in pdisk_readonly and args.enable_readonly_pdisks:
make_pdisks_not_readonly.append(('un-readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, False)))
if can_act_on_vslot(*vslot_id) and (recent_restarts or args.disable_restarts):
if not args.disable_evicts:
evicts.append(('evict vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_evict, vslot_id)))
Expand All @@ -290,6 +303,18 @@ def do_kill_blob_depot():
if not args.disable_readonly:
readonlies.append(('readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, True)))

for pdisk in base_config.PDisk:
node_id, pdisk_id = pdisk.NodeId, pdisk.PDiskId

if can_act_on_pdisk(node_id, pdisk_id):
if args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_restart_pdisk, node_id, pdisk_id)))
if args.enable_readonly_pdisks:
make_pdisks_readonly.append(('readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, True)))

if (node_id, pdisk_id) in pdisk_readonly and args.enable_readonly_pdisks:
make_pdisks_not_readonly.append(('un-readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, False)))

def pick(v):
action_name, action = random.choice(v)
print(action_name)
Expand Down
17 changes: 8 additions & 9 deletions ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,21 +356,20 @@ namespace NKikimr::NStorage {

const TPDiskKey key(pdisk);

if (pdisk.HasReadOnly()) {
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
auto& record = it->second;
auto localPdiskIt = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()});
if (localPdiskIt != LocalPDisks.end()) {
auto& record = localPdiskIt->second;

if (!record.Record.HasReadOnly() || record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
// Changing read-only flag requires restart.
entityStatus = NKikimrBlobStorage::RESTART;
}
if (record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
// Changing read-only flag requires restart.
entityStatus = NKikimrBlobStorage::RESTART;
}
}

switch (entityStatus) {
case NKikimrBlobStorage::RESTART:
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
it->second.Record = pdisk;
if (localPdiskIt != LocalPDisks.end()) {
localPdiskIt->second.Record = pdisk;
}
DoRestartLocalPDisk(pdisk);
[[fallthrough]];
Expand Down
5 changes: 4 additions & 1 deletion ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3809,7 +3809,10 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
return true;
case ERequestType::RequestYardSlay: {
TSlay &req = *static_cast<TSlay*>(request);
ActorSystem->Send(sender, new NPDisk::TEvSlayResult(NKikimrProto::CORRUPTED, 0,
// We send NOTREADY, since BSController can't handle CORRUPTED or ERROR.
// If for some reason the disk will become *not* read-only, the request will be retried and VDisk will be slain.
// If not, we will be retrying the request until the disk is replaced during maintenance.
ActorSystem->Send(sender, new NPDisk::TEvSlayResult(NKikimrProto::NOTREADY, 0,
req.VDiskId, req.SlayOwnerRound, req.PDiskId, req.VSlotId, errorReason));
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ Y_UNIT_TEST_SUITE(ReadOnlyPDisk) {
vdisk.ReadLog(); // Should be able to read log.
}

template <class Request, class Response, class... Args>
template <class Request, class Response, NKikimrProto::EReplyStatus ExpectedStatus = NKikimrProto::CORRUPTED, class... Args>
auto CheckReadOnlyRequest(Args&&... args) {
return [args = std::forward_as_tuple(args...)](TActorTestContext& testCtx) {
Request* req = std::apply([](auto&&... unpackedArgs) {
Expand Down Expand Up @@ -1218,7 +1218,7 @@ Y_UNIT_TEST_SUITE(ReadOnlyPDisk) {
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound + 1
),
// Should fail on slaying vdisk. (ERequestType::RequestYardSlay)
CheckReadOnlyRequest<NPDisk::TEvSlay, NPDisk::TEvSlayResult>(
CheckReadOnlyRequest<NPDisk::TEvSlay, NPDisk::TEvSlayResult, NKikimrProto::NOTREADY>(
vdisk.VDiskID, vdisk.PDiskParams->OwnerRound + 1, 0, 0
)
};
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,8 +446,17 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
void Handle(NPDisk::TEvSlay::TPtr ev) {
auto *msg = ev->Get();
PDISK_MOCK_LOG(INFO, PDM17, "received TEvSlay", (Msg, msg->ToString()));

auto res = std::make_unique<NPDisk::TEvSlayResult>(NKikimrProto::OK, GetStatusFlags(), msg->VDiskId,
msg->SlayOwnerRound, msg->PDiskId, msg->VSlotId, TString());

if (Impl.IsDiskReadOnly) {
res->Status = NKikimrProto::NOTREADY;
res->ErrorReason = "PDisk is in read-only mode";
Send(ev->Sender, res.release());
return;
}

bool found = false;
for (auto& [ownerId, owner] : Impl.Owners) {
if (!owner.VDiskId.SameExceptGeneration(msg->VDiskId)) {
Expand Down
209 changes: 161 additions & 48 deletions ydb/core/blobstorage/ut_blobstorage/read_only_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@

Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) {

NKikimrBlobStorage::TConfigRequest CreateReadOnlyRequest(ui32 nodeId, ui32 pdiskId, bool readOnly, bool ignoreDegraded = false) {
NKikimrBlobStorage::TConfigRequest request;
request.SetIgnoreDegradedGroupsChecks(ignoreDegraded);

NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly();
auto pdisk = cmd->MutableTargetPDiskId();
cmd->SetValue(readOnly);
pdisk->SetNodeId(nodeId);
pdisk->SetPDiskId(pdiskId);

return std::move(request);
}

NKikimrBlobStorage::TConfigResponse SetReadOnly(TEnvironmentSetup& env, ui32 nodeId, ui32 pdiskId, bool readOnly, bool ignoreDegraded = false) {
NKikimrBlobStorage::TConfigRequest request = CreateReadOnlyRequest(nodeId, pdiskId, readOnly, ignoreDegraded);

return env.Invoke(request);
}

Y_UNIT_TEST(ReadOnlyNotAllowed) {
TEnvironmentSetup env({
.NodeCount = 10,
Expand Down Expand Up @@ -34,16 +53,7 @@ Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) {
for (; it != diskGuids.end(); it++, i++) {
auto& diskId = it->first;

NKikimrBlobStorage::TConfigRequest request;
request.SetIgnoreDegradedGroupsChecks(true);

NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly();
auto pdiskId = cmd->MutableTargetPDiskId();
cmd->SetValue(true);
pdiskId->SetNodeId(diskId.NodeId);
pdiskId->SetPDiskId(diskId.PDiskId);

auto response = env.Invoke(request);
auto response = SetReadOnly(env, diskId.NodeId, diskId.PDiskId, true, true);

if (i < 2) {
// Two disks can be set ReadOnly.
Expand Down Expand Up @@ -131,16 +141,7 @@ Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) {
}

{
NKikimrBlobStorage::TConfigRequest request;
request.SetIgnoreDegradedGroupsChecks(true);

NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly();
auto pdiskId = cmd->MutableTargetPDiskId();
cmd->SetValue(true);
pdiskId->SetNodeId(diskId.NodeId);
pdiskId->SetPDiskId(diskId.PDiskId);

auto response = env.Invoke(request);
auto response = SetReadOnly(env, diskId.NodeId, diskId.PDiskId, true, true);

UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());
}
Expand Down Expand Up @@ -204,16 +205,7 @@ Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) {
auto& diskId = it->first;

for (auto val : {true, false}) {
NKikimrBlobStorage::TConfigRequest request;
request.SetIgnoreDegradedGroupsChecks(true);

NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly();
auto pdiskId = cmd->MutableTargetPDiskId();
cmd->SetValue(val);
pdiskId->SetNodeId(diskId.NodeId);
pdiskId->SetPDiskId(diskId.PDiskId);

Invoke(env, request);
Invoke(env, CreateReadOnlyRequest(diskId.NodeId, diskId.PDiskId, val, true));

TInstant barrier = env.Runtime->GetClock() + TDuration::Minutes(5);

Expand Down Expand Up @@ -309,15 +301,8 @@ Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) {
// Restarting the owner of an already broken disk in a broken group must be allowed
auto& [targetNodeId, targetPDiskId, unused1, unused2] = vdisks[0];

NKikimrBlobStorage::TConfigRequest request;

NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly();
auto pdiskId = cmd->MutableTargetPDiskId();
cmd->SetValue(true);
pdiskId->SetNodeId(targetNodeId);
pdiskId->SetPDiskId(targetPDiskId);

auto response = env.Invoke(request);
auto response = SetReadOnly(env, targetNodeId, targetPDiskId, true);

UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());

// Wait until pdisk restarts and node warden sends "pdisk restarted" to BSC.
Expand Down Expand Up @@ -371,17 +356,145 @@ Y_UNIT_TEST_SUITE(BSCReadOnlyPDisk) {
}

// However making the owner of a single good disk ReadOnly must be prohibited
NKikimrBlobStorage::TConfigRequest request;

NKikimrBlobStorage::TSetPDiskReadOnly* cmd = request.AddCommand()->MutableSetPDiskReadOnly();
auto pdiskId = cmd->MutableTargetPDiskId();
cmd->SetValue(true);
pdiskId->SetNodeId(targetNodeId);
pdiskId->SetPDiskId(targetPDiskId);

auto response = env.Invoke(request);
auto response = SetReadOnly(env, targetNodeId, targetPDiskId, true);

UNIT_ASSERT_C(!response.GetSuccess(), "ReadOnly should've been prohibited");
UNIT_ASSERT_STRING_CONTAINS(response.GetErrorDescription(), "Disintegrated");
}

Y_UNIT_TEST(ReadOnlySlay) {
TEnvironmentSetup env{{
.NodeCount = 8,
.VDiskReplPausedAtStart = true,
.Erasure = TBlobStorageGroupType::Erasure4Plus2Block,
}};
auto& runtime = env.Runtime;

env.EnableDonorMode();
env.CreateBoxAndPool(2, 1);
env.CommenceReplication();
env.Sim(TDuration::Seconds(30));

const ui32 groupId = env.GetGroups().front();

const TActorId edge = runtime->AllocateEdgeActor(1, __FILE__, __LINE__);
for (ui32 i = 0; i < 100; ++i) {
const TString buffer = TStringBuilder() << "blob number " << i;
TLogoBlobID id(1, 1, 1, 0, buffer.size(), 0);
runtime->WrapInActorContext(edge, [&] {
SendToBSProxy(edge, groupId, new TEvBlobStorage::TEvPut(id, buffer, TInstant::Max()));
});
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvPutResult>(edge, false);
UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
}

// Wait for sync and stuff.
env.Sim(TDuration::Seconds(3));

// Move slot out the disk.
auto info = env.GetGroupInfo(groupId);
const TVDiskID& vdiskId = info->GetVDiskId(0);
const TActorId& vdiskActorId = info->GetActorId(0);

ui32 targetNodeId, targetPDiskId;
std::tie(targetNodeId, targetPDiskId, std::ignore) = DecomposeVDiskServiceId(vdiskActorId);

{
auto response = SetReadOnly(env, targetNodeId, targetPDiskId, true);

UNIT_ASSERT_C(response.GetSuccess(), "ReadOnly should've been allowed");
}

env.SettlePDisk(vdiskActorId);
env.Sim(TDuration::Seconds(30));

// Find our donor and acceptor disks.
auto baseConfig = env.FetchBaseConfig();
bool found = false;
std::pair<ui32, ui32> donorPDiskId;
std::tuple<ui32, ui32, ui32> acceptor;
std::tuple<ui32, ui32, ui32> donorId;
for (const auto& slot : baseConfig.GetVSlot()) {
if (slot.DonorsSize()) {
UNIT_ASSERT(!found);
UNIT_ASSERT_VALUES_EQUAL(slot.DonorsSize(), 1);
const auto& donor = slot.GetDonors(0);
const auto& id = donor.GetVSlotId();
UNIT_ASSERT_VALUES_EQUAL(vdiskActorId, MakeBlobStorageVDiskID(id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()));
UNIT_ASSERT_VALUES_EQUAL(VDiskIDFromVDiskID(donor.GetVDiskId()), vdiskId);
donorPDiskId = {id.GetNodeId(), id.GetPDiskId()};
donorId = {id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()};
const auto& acceptorId = slot.GetVSlotId();
acceptor = {acceptorId.GetNodeId(), acceptorId.GetPDiskId(), acceptorId.GetVSlotId()};
found = true;
}
}
UNIT_ASSERT(found);

// Restart with formatting.
env.Cleanup();
const size_t num = env.PDiskMockStates.erase(donorPDiskId);
UNIT_ASSERT_VALUES_EQUAL(num, 1);
env.Initialize();

// Wait for initialization.
env.Sim(TDuration::Seconds(30));

// Ensure donor finished its job.
baseConfig = env.FetchBaseConfig();
found = false;
for (const auto& slot : baseConfig.GetVSlot()) {
const auto& id = slot.GetVSlotId();
if (std::make_tuple(id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()) == acceptor) {
UNIT_ASSERT(!found);
UNIT_ASSERT_VALUES_EQUAL(slot.DonorsSize(), 0);
UNIT_ASSERT_VALUES_EQUAL(slot.GetStatus(), "REPLICATING");
found = true;
}
}
UNIT_ASSERT(found);

// Ensure donor was not slain yet.
TInstant barrier = env.Runtime->GetClock() + TDuration::Minutes(10);
env.Runtime->Sim([&] { return env.Runtime->GetClock() <= barrier; }, [&](IEventHandle &witnessedEvent) {
switch (witnessedEvent.GetTypeRewrite()) {
case TEvBlobStorage::TEvControllerNodeReport::EventType: {
UNIT_ASSERT(false);
break;
}
}
});

// Now make disk not read-only so that it can slay donor vdisk.
{
auto response = SetReadOnly(env, targetNodeId, targetPDiskId, false);

UNIT_ASSERT_C(response.GetSuccess(), "ReadOnly should've been allowed");
}

// Ensure donor vdisk was slain.
barrier = env.Runtime->GetClock() + TDuration::Minutes(10);
bool gotNodeReport = false;
env.Runtime->Sim([&] { return env.Runtime->GetClock() <= barrier && (!gotNodeReport); }, [&](IEventHandle &witnessedEvent) {
switch (witnessedEvent.GetTypeRewrite()) {
case TEvBlobStorage::TEvControllerNodeReport::EventType: {
auto *nodeReport = witnessedEvent.Get<TEvBlobStorage::TEvControllerNodeReport>();
if (nodeReport) {
const auto& vdisks = nodeReport->Record.GetVDiskReports();
if (vdisks.size() == 1) {
auto& vdisk = vdisks[0];
const auto& vslotId = vdisk.GetVSlotId();
std::tuple<ui32, ui32, ui32> vdiskId = {vslotId.GetNodeId(), vslotId.GetPDiskId(), vslotId.GetVSlotId()};
UNIT_ASSERT_VALUES_EQUAL(donorId, vdiskId);
UNIT_ASSERT_EQUAL(vdisk.GetPhase(), NKikimrBlobStorage::TEvControllerNodeReport::DESTROYED);
gotNodeReport = true;
}
}
break;
}
}
});

UNIT_ASSERT(gotNodeReport);
}
}

0 comments on commit c9823a6

Please sign in to comment.