Skip to content

Commit

Permalink
process follower updates in chunks (#13023)
Browse files Browse the repository at this point in the history
  • Loading branch information
vporyadke authored Dec 27, 2024
1 parent 8c80706 commit df406e3
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 52 deletions.
40 changes: 40 additions & 0 deletions ydb/core/mind/hive/hive.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,46 @@ struct TNodeFilter {
bool IsAllowedDataCenter(TDataCenterId dc) const;
};

struct TFollowerUpdates {
enum class EAction {
Create,
Update,
Delete,
};

struct TUpdate {
EAction Action;
TFullTabletId TabletId;
TFollowerGroupId GroupId;
TDataCenterId DataCenter;
};

std::deque<TUpdate> Updates;

bool Empty() const {
return Updates.empty();
}

void Create(TFullTabletId leaderTablet, TFollowerGroupId group, TDataCenterId dc) {
Updates.emplace_back(EAction::Create, leaderTablet, group, dc);
}

void Update(TFullTabletId tablet, TDataCenterId dc) {
Updates.emplace_back(EAction::Update, tablet, 0, dc);
}

void Delete(TFullTabletId tablet, TFollowerGroupId group, TDataCenterId dc) {
Updates.emplace_back(EAction::Delete, tablet, group, dc);
}

TUpdate Pop() {
TUpdate update = Updates.front();
Updates.pop_front();
return update;
}
};


} // NHive
} // NKikimr

Expand Down
4 changes: 4 additions & 0 deletions ydb/core/mind/hive/hive_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct TEvPrivate {
EvUpdateDataCenterFollowers,
EvGenerateTestData,
EvRefreshScaleRecommendation,
EvUpdateFollowers,
EvEnd
};

Expand Down Expand Up @@ -133,6 +134,9 @@ struct TEvPrivate {
struct TEvGenerateTestData : TEventLocal<TEvGenerateTestData, EvGenerateTestData> {};

struct TEvRefreshScaleRecommendation : TEventLocal<TEvRefreshScaleRecommendation, EvRefreshScaleRecommendation> {};

struct TEvUpdateFollowers : TEventLocal<TEvUpdateFollowers, EvUpdateFollowers> {
};
};

} // NHive
Expand Down
37 changes: 36 additions & 1 deletion ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3072,6 +3072,7 @@ void THive::ProcessEvent(std::unique_ptr<IEventHandle> event) {
hFunc(TEvPrivate::TEvGenerateTestData, Handle);
hFunc(TEvPrivate::TEvRefreshScaleRecommendation, Handle);
hFunc(TEvHive::TEvConfigureScaleRecommender, Handle);
hFunc(TEvPrivate::TEvUpdateFollowers, Handle);
}
}

Expand Down Expand Up @@ -3179,6 +3180,7 @@ STFUNC(THive::StateWork) {
fFunc(TEvPrivate::TEvGenerateTestData::EventType, EnqueueIncomingEvent);
fFunc(TEvPrivate::TEvRefreshScaleRecommendation::EventType, EnqueueIncomingEvent);
fFunc(TEvHive::TEvConfigureScaleRecommender::EventType, EnqueueIncomingEvent);
fFunc(TEvPrivate::TEvUpdateFollowers::EventType, EnqueueIncomingEvent);
hFunc(TEvPrivate::TEvProcessIncomingEvent, Handle);
default:
if (!HandleDefaultEvents(ev, SelfId())) {
Expand Down Expand Up @@ -3480,7 +3482,40 @@ void THive::Handle(TEvHive::TEvRequestTabletDistribution::TPtr& ev) {
}

void THive::Handle(TEvPrivate::TEvUpdateDataCenterFollowers::TPtr& ev) {
Execute(CreateUpdateDcFollowers(ev->Get()->DataCenter));
auto dataCenterId = ev->Get()->DataCenter;
auto& dataCenter = DataCenters[dataCenterId];
if (!dataCenter.UpdateScheduled) {
return;
}
dataCenter.UpdateScheduled = false;
if (dataCenter.IsRegistered()) {
for (auto& [tabletId, tablet] : Tablets) {
for (auto& group : tablet.FollowerGroups) {
auto& followers = dataCenter.Followers[{tabletId, group.Id}];
i64 neededCount = group.GetFollowerCountForDataCenter(dataCenterId);
i64 delta = neededCount - std::ssize(followers);
for (i64 i = 0; i < delta; ++i) {
BLOG_TRACE("UpdateDataCenterFollowers: Pending create follower for " << tabletId);
PendingFollowerUpdates.Create(tablet.GetFullTabletId(), group.Id, dataCenterId);
}
}
}
} else {
for (auto& [group, followers] : dataCenter.Followers) {
for (auto follower : followers) {
BLOG_TRACE("UpdateDataCenterFollowers: Pending delete follower for " << follower->GetFullTabletId());
PendingFollowerUpdates.Delete(follower->GetFullTabletId(), group.second, dataCenterId);
}
}
}
if (!PendingFollowerUpdates.Empty() && !ProcessFollowerUpdatesScheduled) {
Send(SelfId(), new TEvPrivate::TEvUpdateFollowers);
ProcessFollowerUpdatesScheduled = true;
}
}

void THive::Handle(TEvPrivate::TEvUpdateFollowers::TPtr&) {
Execute(CreateProcessUpdateFollowers());
}

void THive::MakeScaleRecommendation() {
Expand Down
7 changes: 5 additions & 2 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
friend class TTxUpdateTabletGroups;
friend class TTxMonEvent_TabletAvailability;
friend class TLoggedMonTransaction;
friend class TTxUpdateDcFollowers;
friend class TTxProcessUpdateFollowers;

friend class TDeleteTabletActor;

Expand Down Expand Up @@ -303,7 +303,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
ITransaction* CreateRequestTabletOwners(TEvHive::TEvRequestTabletOwners::TPtr event);
ITransaction* CreateUpdateTabletsObject(TEvHive::TEvUpdateTabletsObject::TPtr event);
ITransaction* CreateUpdateDomain(TSubDomainKey subdomainKey, TEvHive::TEvUpdateDomain::TPtr event = {});
ITransaction* CreateUpdateDcFollowers(const TDataCenterId& dc);
ITransaction* CreateProcessUpdateFollowers();
ITransaction* CreateGenerateTestData(uint64_t seed);
ITransaction* CreateDeleteNode(TNodeId nodeId);
ITransaction* CreateConfigureScaleRecommender(TEvHive::TEvConfigureScaleRecommender::TPtr event);
Expand Down Expand Up @@ -405,6 +405,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
bool ProcessPendingOperationsScheduled = false;
bool LogTabletMovesScheduled = false;
bool ProcessStorageBalancerScheduled = false;
bool ProcessFollowerUpdatesScheduled = false;
TResourceRawValues TotalRawResourceValues = {};
TResourceNormalizedValues TotalNormalizedResourceValues = {};
TInstant LastResourceChangeReaction;
Expand All @@ -422,6 +423,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
std::vector<TActorId> ActorsWaitingToMoveTablets;
std::queue<TActorId> NodePingQueue;
std::unordered_set<TNodeId> NodePingsInProgress;
TFollowerUpdates PendingFollowerUpdates;

struct TPendingCreateTablet {
NKikimrHive::TEvCreateTablet CreateTablet;
Expand Down Expand Up @@ -586,6 +588,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
void Handle(TEvPrivate::TEvGenerateTestData::TPtr& ev);
void Handle(TEvPrivate::TEvRefreshScaleRecommendation::TPtr& ev);
void Handle(TEvHive::TEvConfigureScaleRecommender::TPtr& ev);
void Handle(TEvPrivate::TEvUpdateFollowers::TPtr& ev);

protected:
void RestartPipeTx(ui64 tabletId);
Expand Down
9 changes: 7 additions & 2 deletions ydb/core/mind/hive/tx__load_everything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ class TTxLoadEverything : public TTransactionBase<THive> {
--cnt;
allowedDc = {dc};
Self->DataCenters[dc].Followers[{tabletId, groupId}].push_back(followerIt);
db.Table<Schema::TabletFollowerTablet>().Key(tabletId, followerIt->Id).Update<Schema::TabletFollowerTablet::DataCenter>(dc);
Self->PendingFollowerUpdates.Update({tabletId, followerIt->Id}, dc);
ok = true;
}
}
Expand All @@ -692,7 +692,7 @@ class TTxLoadEverything : public TTransactionBase<THive> {
}
follower->NodeFilter.AllowedDataCenters = {dcIt->first};
Self->DataCenters[dcIt->first].Followers[{tabletId, groupId}].push_back(follower);
db.Table<Schema::TabletFollowerTablet>().Key(follower->GetFullTabletId()).Update<Schema::TabletFollowerTablet::DataCenter>(dcIt->first);
Self->PendingFollowerUpdates.Update(follower->GetFullTabletId(), dcIt->first);
--dcIt->second;
}
}
Expand Down Expand Up @@ -859,6 +859,11 @@ class TTxLoadEverything : public TTransactionBase<THive> {
Self->MigrationState = NKikimrHive::EMigrationState::MIGRATION_READY;
ctx.Send(Self->SelfId(), new TEvPrivate::TEvBootTablets());

if (!Self->PendingFollowerUpdates.Empty()) {
ctx.Send(Self->SelfId(), new TEvPrivate::TEvUpdateFollowers);
Self->ProcessFollowerUpdatesScheduled = true;
}

for (auto it = Self->Nodes.begin(); it != Self->Nodes.end(); ++it) {
Self->ScheduleUnlockTabletExecution(it->second);
}
Expand Down
117 changes: 70 additions & 47 deletions ydb/core/mind/hive/tx__update_dc_followers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,64 +4,87 @@
namespace NKikimr {
namespace NHive {

class TTxUpdateDcFollowers : public TTransactionBase<THive> {
TDataCenterId DataCenterId;
class TTxProcessUpdateFollowers : public TTransactionBase<THive> {
TSideEffects SideEffects;

static constexpr size_t MAX_UPDATES_PROCESSED = 1000;
public:
TTxUpdateDcFollowers(const TDataCenterId& dataCenter, THive* hive)
TTxProcessUpdateFollowers(THive* hive)
: TBase(hive)
, DataCenterId(dataCenter)
{}

TTxType GetTxType() const override { return NHive::TXTYPE_UPDATE_DC_FOLLOWERS; }

bool Execute(TTransactionContext& txc, const TActorContext&) override {
BLOG_D("THive::TTxUpdateDcFollowers::Execute(" << DataCenterId << ")");
SideEffects.Reset(Self->SelfId());
BLOG_D("TTxProcessUpdateFollowers::Execute()");
NIceDb::TNiceDb db(txc.DB);
auto& dataCenter = Self->DataCenters[DataCenterId];
if (!dataCenter.UpdateScheduled) {
return true;
}
dataCenter.UpdateScheduled = false;
if (dataCenter.IsRegistered()) {
for (auto& [tabletId, tablet] : Self->Tablets) {
for (auto& group : tablet.FollowerGroups) {
auto& followers = dataCenter.Followers[{tabletId, group.Id}];
auto neededCount = group.GetFollowerCountForDataCenter(DataCenterId);
while (followers.size() < neededCount) {
TFollowerTabletInfo& follower = tablet.AddFollower(group);
follower.NodeFilter.AllowedDataCenters = {DataCenterId};
follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds());
db.Table<Schema::TabletFollowerTablet>().Key(tabletId, follower.Id).Update(
NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics),
NIceDb::TUpdate<Schema::TabletFollowerTablet::DataCenter>(DataCenterId));
follower.InitTabletMetrics();
follower.BecomeStopped();
follower.InitiateBoot();
followers.push_back(std::prev(tablet.Followers.end()));
BLOG_D("THive::TTxUpdateDcFollowers::Execute(" << DataCenterId << "): created follower " << follower.GetFullTabletId());
SideEffects.Reset(Self->SelfId());
for (size_t i = 0; !Self->PendingFollowerUpdates.Empty() && i < MAX_UPDATES_PROCESSED; ++i) {
auto op = Self->PendingFollowerUpdates.Pop();
TTabletInfo* tablet = Self->FindTablet(op.TabletId);
auto& dc = Self->DataCenters[op.DataCenter];
if (tablet == nullptr) {
continue;
}
switch (op.Action) {
case TFollowerUpdates::EAction::Create:
{
if (!dc.IsRegistered()) {
continue;
}
TFollowerGroup& group = tablet->AsLeader().GetFollowerGroup(op.GroupId);
auto& followers = dc.Followers[{op.TabletId.first, op.GroupId}];
if (group.GetFollowerCountForDataCenter(op.DataCenter) <= followers.size()) {
continue;
}
TFollowerTabletInfo& follower = tablet->AsLeader().AddFollower(group);
follower.NodeFilter.AllowedDataCenters = {op.DataCenter};
follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds());
db.Table<Schema::TabletFollowerTablet>().Key(op.TabletId.first, follower.Id).Update(
NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics),
NIceDb::TUpdate<Schema::TabletFollowerTablet::DataCenter>(op.DataCenter));
follower.InitTabletMetrics();
follower.BecomeStopped();
follower.InitiateBoot();
followers.push_back(std::prev(tablet->AsLeader().Followers.end()));
BLOG_D("THive::TTxProcessUpdateFollowers::Execute(): created follower " << follower.GetFullTabletId());
break;
}
}
} else {
// deleting followers
i64 deletedFollowers = 0;
for (auto& [_, followers] : dataCenter.Followers) {
for (auto follower : followers) {
db.Table<Schema::TabletFollowerTablet>().Key(follower->GetFullTabletId()).Delete();
db.Table<Schema::Metrics>().Key(follower->GetFullTabletId()).Delete();
follower->InitiateStop(SideEffects);
auto& leader = follower->GetLeader();
leader.Followers.erase(follower);
++deletedFollowers;
case TFollowerUpdates::EAction::Update:
{
// This is updated in memory in LoadEverything
bool exists = db.Table<Schema::TabletFollowerTablet>().Key(op.TabletId).Select().IsValid();
Y_ABORT_UNLESS(exists, "%s", (TStringBuilder() << "trying to update tablet " << op.TabletId).data());
db.Table<Schema::TabletFollowerTablet>().Key(op.TabletId).Update<Schema::TabletFollowerTablet::DataCenter>(op.DataCenter);
break;
}
case TFollowerUpdates::EAction::Delete:
{
if (dc.IsRegistered()) {
continue;
}
db.Table<Schema::TabletFollowerTablet>().Key(op.TabletId).Delete();
db.Table<Schema::Metrics>().Key(op.TabletId).Delete();
tablet->InitiateStop(SideEffects);
auto& followers = dc.Followers[{op.TabletId.first, op.GroupId}]; // Note: there are at most 3 followers here, see TPartitionConfigMerger
auto iter = std::find_if(followers.begin(), followers.end(), [tabletId = op.TabletId](const auto& fw) {
return fw->GetFullTabletId() == tabletId;
});
Y_ABORT_UNLESS(iter != followers.end());
auto& leader = tablet->GetLeader();
leader.Followers.erase(*iter);
followers.erase(iter);
Self->UpdateCounterTabletsTotal(-1);
break;
}
}
BLOG_D("THive::TTxUpdateDcFollowers::Execute(" << DataCenterId << "): deleted " << deletedFollowers << " followers");
Self->UpdateCounterTabletsTotal(-deletedFollowers);
dataCenter.Followers.clear();
}
if (Self->PendingFollowerUpdates.Empty()) {
Self->ProcessFollowerUpdatesScheduled = false;
} else {
SideEffects.Send(Self->SelfId(), new TEvPrivate::TEvUpdateFollowers);
}
return true;
}
Expand All @@ -71,8 +94,8 @@ class TTxUpdateDcFollowers : public TTransactionBase<THive> {
}
};

ITransaction* THive::CreateUpdateDcFollowers(const TDataCenterId& dc) {
return new TTxUpdateDcFollowers(dc, this);
ITransaction* THive::CreateProcessUpdateFollowers() {
return new TTxProcessUpdateFollowers(this);
}

} // NHive
Expand Down

0 comments on commit df406e3

Please sign in to comment.