Skip to content

Commit

Permalink
Turn on cluster update merging by default
Browse files Browse the repository at this point in the history
We've been using this in production for over 3 months now and it's
been very useful to prevent CPU spikes when we get a stream of
updates.

This enables update merging every 1s.

Fixes envoyproxy#4018.

Signed-off-by: Raul Gutierrez Segales <[email protected]>
  • Loading branch information
Raul Gutierrez Segales committed Nov 10, 2018
1 parent 02b2e79 commit a1dba97
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 12 deletions.
3 changes: 3 additions & 0 deletions api/envoy/api/v2/cds.proto
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,9 @@ message Cluster {
// or metadata updates. By default, this is not configured and updates apply immediately. Also,
// the first set of updates to be seen apply immediately as well (e.g.: a new cluster).
//
// If this is not set, we default to a merge window of 1000ms. To disable it, set the merge
// window to 0.
//
// Note: merging does not apply to cluster membership changes (e.g.: adds/removes); this is
// because merging those updates isn't currently safe. See
// https://github.com/envoyproxy/envoy/pull/3941.
Expand Down
1 change: 1 addition & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Version history
* admin: :http:get:`/server_info` now responds with a JSON object instead of a single string.
* circuit-breaker: added cx_open, rq_pending_open, rq_open and rq_retry_open gauges to expose live
state via :ref:`circuit breakers statistics <config_cluster_manager_cluster_stats_circuit_breakers>`.
* cluster: set a default of 1s for :ref:`option <envoy_api_field_Cluster.CommonLbConfig.update_merge_window>`.
* config: removed support for the v1 API.
* config: added support for :ref:`rate limiting<envoy_api_msg_core.RateLimitSettings>` discovery request calls.
* cors: added :ref: `invalid/valid stats <cors-statistics>` to filter.
Expand Down
21 changes: 14 additions & 7 deletions source/common/upstream/cluster_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,15 @@ ClusterManagerStats ClusterManagerImpl::generateStats(Stats::Scope& scope) {
POOL_GAUGE_PREFIX(scope, final_prefix))};
}

uint64_t ClusterManagerImpl::mergeTimeout(const Cluster& cluster) const {
if (cluster.info()->lbConfig().has_update_merge_window()) {
const auto& update_merge_window = cluster.info()->lbConfig().update_merge_window();
return DurationUtil::durationToMilliseconds(update_merge_window);
}

return 1000;
}

void ClusterManagerImpl::onClusterInit(Cluster& cluster) {
// This routine is called when a cluster has finished initializing. The cluster has not yet
// been setup for cross-thread updates to avoid needless updates during initialization. The order
Expand Down Expand Up @@ -347,14 +356,14 @@ void ClusterManagerImpl::onClusterInit(Cluster& cluster) {
//
// See https://github.com/envoyproxy/envoy/pull/3941 for more context.
bool scheduled = false;
const bool merging_enabled = cluster.info()->lbConfig().has_update_merge_window();
const auto merge_timeout = mergeTimeout(cluster);
// Remember: we only merge updates with no adds/removes — just hc/weight/metadata changes.
const bool is_mergeable = !hosts_added.size() && !hosts_removed.size();

if (merging_enabled) {
if (merge_timeout > 0) {
// If this is not mergeable, we should cancel any scheduled updates since
// we'll deliver it immediately.
scheduled = scheduleUpdate(cluster, priority, is_mergeable);
scheduled = scheduleUpdate(cluster, priority, is_mergeable, merge_timeout);
}

// If an update was not scheduled for later, deliver it immediately.
Expand All @@ -374,10 +383,8 @@ void ClusterManagerImpl::onClusterInit(Cluster& cluster) {
}
}

bool ClusterManagerImpl::scheduleUpdate(const Cluster& cluster, uint32_t priority, bool mergeable) {
const auto& update_merge_window = cluster.info()->lbConfig().update_merge_window();
const auto timeout = DurationUtil::durationToMilliseconds(update_merge_window);

bool ClusterManagerImpl::scheduleUpdate(const Cluster& cluster, uint32_t priority, bool mergeable,
const uint64_t timeout) {
// Find pending updates for this cluster.
auto& updates_by_prio = updates_map_[cluster.info()->name()];
if (!updates_by_prio) {
Expand Down
4 changes: 3 additions & 1 deletion source/common/upstream/cluster_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,9 @@ class ClusterManagerImpl : public ClusterManager, Logger::Loggable<Logger::Id::u
using ClusterUpdatesMap = std::unordered_map<std::string, PendingUpdatesByPriorityMapPtr>;

void applyUpdates(const Cluster& cluster, uint32_t priority, PendingUpdates& updates);
bool scheduleUpdate(const Cluster& cluster, uint32_t priority, bool mergeable);
bool scheduleUpdate(const Cluster& cluster, uint32_t priority, bool mergeable,
const uint64_t timeout);
uint64_t mergeTimeout(const Cluster& cluster) const;
void createOrUpdateThreadLocalCluster(ClusterData& cluster);
ProtobufTypes::MessagePtr dumpClusterConfigs();
static ClusterManagerStats generateStats(Stats::Scope& scope);
Expand Down
10 changes: 6 additions & 4 deletions test/common/upstream/cluster_manager_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,16 @@ class ClusterManagerImplTest : public testing::Test {
address: "127.0.0.1"
port_value: 11002
)EOF";
const std::string merge_window = R"EOF(
const std::string merge_window_enabled = R"EOF(
common_lb_config:
update_merge_window: 3s
)EOF";
const std::string merge_window_disabled = R"EOF(
common_lb_config:
update_merge_window: 0s
)EOF";

if (enable_merge_window) {
yaml += merge_window;
}
yaml += enable_merge_window ? merge_window_enabled : merge_window_disabled;

const auto& bootstrap = parseBootstrapFromV2Yaml(yaml);

Expand Down

0 comments on commit a1dba97

Please sign in to comment.