Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamic_forward_proxy: DNS Cache circuit breaker #11028

Merged
merged 64 commits into from
Jul 1, 2020
Merged
Show file tree
Hide file tree
Changes from 58 commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
6dc3f0e
init
Shikugawa Apr 30, 2020
855b379
impl
Shikugawa May 1, 2020
3eac0c4
impl stats
Shikugawa May 4, 2020
e898071
review
Shikugawa May 4, 2020
65d9b03
fix test
Shikugawa May 4, 2020
121ebd7
resource manager test
Shikugawa May 5, 2020
94ed978
add circuit breaking checker
Shikugawa May 5, 2020
d77a679
fix
Shikugawa May 7, 2020
a8d7f74
fix
Shikugawa May 13, 2020
4c0ae87
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa May 19, 2020
202a1de
cleanup
Shikugawa May 19, 2020
294b2db
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa May 19, 2020
0e2f691
fix
Shikugawa May 19, 2020
a8cda18
fix
Shikugawa May 27, 2020
5b8bd99
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa May 27, 2020
d72d1f7
check
Shikugawa May 28, 2020
bf920fb
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa May 28, 2020
5f265d6
format
Shikugawa May 28, 2020
0a26a58
docs
Shikugawa May 28, 2020
6b4d90e
fix test
Shikugawa May 28, 2020
ba9bbbf
fix
Shikugawa May 28, 2020
5093f2b
Kick CI
Shikugawa May 29, 2020
f5e4d8e
tidy
Shikugawa May 29, 2020
081e794
fix
Shikugawa Jun 2, 2020
ae613ce
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa Jun 2, 2020
b3ac327
format
Shikugawa Jun 2, 2020
267929b
fix
Shikugawa Jun 2, 2020
873ec6d
tidy
Shikugawa Jun 2, 2020
83e5879
fix
Shikugawa Jun 3, 2020
298d368
fix error
Shikugawa Jun 3, 2020
208828b
fix
Shikugawa Jun 3, 2020
d16f29d
fix
Shikugawa Jun 4, 2020
5f9152b
fix
Shikugawa Jun 4, 2020
a1c679e
fix
Shikugawa Jun 5, 2020
d0e3a31
Kick CI
Shikugawa Jun 7, 2020
cb046ec
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa Jun 10, 2020
be729a5
fix
Shikugawa Jun 11, 2020
63044cd
delete unused sections
Shikugawa Jun 12, 2020
9612a5f
fix
Shikugawa Jun 16, 2020
cbb4430
fix
Shikugawa Jun 16, 2020
9fde624
fix
Shikugawa Jun 17, 2020
abf34cc
fix
Shikugawa Jun 17, 2020
5b13116
fix
Shikugawa Jun 17, 2020
724d068
fix
Shikugawa Jun 17, 2020
959d2d8
fix
Shikugawa Jun 22, 2020
e386cd7
fix
Shikugawa Jun 22, 2020
04a6e8f
fix
Shikugawa Jun 23, 2020
3f26c24
Merge branch 'master' into dns-circuit-breaker
Shikugawa Jun 23, 2020
25dba94
fix
Shikugawa Jun 23, 2020
7d0462d
Merge branch 'dns-circuit-breaker' of github.com:Shikugawa/envoy into…
Shikugawa Jun 23, 2020
a77f70c
fix
Shikugawa Jun 23, 2020
d9f8ad0
Kick CI
Shikugawa Jun 23, 2020
0aecfe2
fix
Shikugawa Jun 25, 2020
0c1f771
fix
Shikugawa Jun 25, 2020
d065a10
fix
Shikugawa Jun 26, 2020
d884a5f
fix
Shikugawa Jun 26, 2020
bfd04c4
fix
Shikugawa Jun 29, 2020
f1fc7bb
fix
Shikugawa Jun 30, 2020
f9cc356
conflict
Shikugawa Jul 1, 2020
a5d361d
fix
Shikugawa Jul 1, 2020
587f3c1
Kick CI
Shikugawa Jul 1, 2020
4838e6a
Kick CI
Shikugawa Jul 1, 2020
0aec3e9
Merge branch 'master' of https://github.com/envoyproxy/envoy into dns…
Shikugawa Jul 1, 2020
05834c2
Kick CI
Shikugawa Jul 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,16 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;

// [#protodoc-title: Dynamic forward proxy common configuration]

// Configuration of circuit breakers for resolver.
mattklein123 marked this conversation as resolved.
Show resolved Hide resolved
message DnsCacheCircuitBreakers {
// The maximum number of pending requests that Envoy will allow to the
// resolver. If not specified, the default is 1024.
google.protobuf.UInt32Value max_pending_requests = 1;
}

// Configuration for the dynamic forward proxy DNS cache. See the :ref:`architecture overview
// <arch_overview_http_dynamic_forward_proxy>` for more information.
// [#next-free-field: 7]
// [#next-free-field: 8]
message DnsCacheConfig {
option (udpa.annotations.versioning).previous_message_type =
"envoy.config.common.dynamic_forward_proxy.v2alpha.DnsCacheConfig";
Expand Down Expand Up @@ -83,4 +90,9 @@ message DnsCacheConfig {
// this is used as the cache's DNS refresh rate when DNS requests are failing. If this setting is
// not specified, the failure refresh rate defaults to the dns_refresh_rate.
config.cluster.v3.Cluster.RefreshRate dns_failure_refresh_rate = 6;

// The config of circuit breakers for resolver. It provides a configurable threshold.
Shikugawa marked this conversation as resolved.
Show resolved Hide resolved
// If `envoy.reloadable_features.enable_dns_cache_circuit_breakers` is enabled,
// envoy will use dns cache circuit breakers with default settings even if this value is not set.
DnsCacheCircuitBreakers dns_cache_circuit_breaker = 7;
Shikugawa marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ host when forwarding. See the example below within the configured routes.
the certificate chain. Additionally, Envoy will automatically perform SAN verification for the
resolved host name as well as specify the host name via SNI.

.. _dns_cache_circuit_breakers:

Dynamic forward proxy uses circuit breakers built in to the DNS cache with the configuration
of :ref:`DNS cache circuit breakers <envoy_v3_api_field_extensions.common.dynamic_forward_proxy.v3.DnsCacheConfig.dns_cache_circuit_breaker>`. By default, this behavior is enabled by the runtime feature `envoy.reloadable_features.enable_dns_cache_circuit_breakers`.
If this runtime feature is disabled, cluster circuit breakers will be used even when setting the configuration
of :ref:`DNS cache circuit breakers <envoy_v3_api_field_extensions.common.dynamic_forward_proxy.v3.DnsCacheConfig.dns_cache_circuit_breaker>`.

.. code-block:: yaml

admin:
Expand Down Expand Up @@ -119,3 +126,14 @@ namespace.
host_added, Counter, Number of hosts that have been added to the cache.
host_removed, Counter, Number of hosts that have been removed from the cache.
num_hosts, Gauge, Number of hosts that are currently in the cache.
dns_rq_pending_overflow, Counter, Number of dns pending request overflow.

The dynamic forward proxy DNS cache circuit breakers outputs statistics in the dns_cache.<dns_cache_name>.circuit_breakers*
namespace.

.. csv-table::
:header: Name, Type, Description
:widths: 1, 1, 2

rq_pending_open, Gauge, Whether the requests circuit breaker is closed (0) or open (1)
Shikugawa marked this conversation as resolved.
Show resolved Hide resolved
rq_pending_remaining, Gauge, Number of remaining requests until the circuit breaker opens
4 changes: 4 additions & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ New Features
* config: added :ref:`version_text <config_cluster_manager_cds>` stat that reflects xDS version.
* decompressor: generic :ref:`decompressor <config_http_filters_decompressor>` filter exposed to users.
* dynamic forward proxy: added :ref:`SNI based dynamic forward proxy <config_network_filters_sni_dynamic_forward_proxy>` support.
* dynamic forward proxy: added configurable :ref:`circuit breakers <dns_cache_circuit_breakers>` for resolver on DNS cache.
This behavior can be temporarily disabled by the runtime feature `envoy.reloadable_features.enable_dns_cache_circuit_breakers`.
If this runtime feature is disabled, the upstream circuit breakers for the cluster will be used even if the :ref:`DNS Cache circuit breakers <dns_cache_circuit_breakers>` are configured.
* dynamic forward proxy: added :ref:`allow_insecure_cluster_options<envoy_v3_api_field_extensions.clusters.dynamic_forward_proxy.v3.ClusterConfig.allow_insecure_cluster_options>` to allow disabling of auto_san_validation and auto_sni.
* ext_authz filter: added :ref:`v2 deny_at_disable <envoy_api_field_config.filter.http.ext_authz.v2.ExtAuthz.deny_at_disable>`, :ref:`v3 deny_at_disable <envoy_v3_api_field_extensions.filters.http.ext_authz.v3.ExtAuthz.deny_at_disable>`. This allows to force deny for protected path while filter gets disabled, by setting this key to true.
* ext_authz filter: added API version field for both :ref:`HTTP <envoy_v3_api_field_extensions.filters.http.ext_authz.v3.ExtAuthz.transport_api_version>`
Expand Down Expand Up @@ -132,3 +135,4 @@ Deprecated
in :ref:`predicates <envoy_v3_api_field_config.route.v3.InternalRedirectPolicy.predicates>`.
* File access logger fields :ref:`format <envoy_v3_api_field_extensions.access_loggers.file.v3.FileAccessLog.format>`, :ref:`json_format <envoy_v3_api_field_extensions.access_loggers.file.v3.FileAccessLog.json_format>` and :ref:`typed_json_format <envoy_v3_api_field_extensions.access_loggers.file.v3.FileAccessLog.typed_json_format>` are deprecated in favor of :ref:`log_format <envoy_v3_api_field_extensions.access_loggers.file.v3.FileAccessLog.log_format>`.
* A warning is now logged when v2 xDS api is used. This behavior can be temporarily disabled by setting `envoy.reloadable_features.enable_deprecated_v2_api_warning` to `false`.
* Using cluster circuit breakers for DNS Cache is now deprecated in favor of :ref:`DNS cache circuit breakers <dns_cache_circuit_breakers>`. This behavior can be temporarily disabled by setting `envoy.reloadable_features.enable_dns_cache_circuit_breakers` to `false`.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions include/envoy/common/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,6 @@ class ResourceLimit {
virtual uint64_t count() const PURE;
};

using ResourceLimitOptRef = absl::optional<std::reference_wrapper<ResourceLimit>>;

} // namespace Envoy
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ constexpr const char* runtime_features[] = {
"envoy.deprecated_features.allow_deprecated_extension_names",
"envoy.reloadable_features.disallow_unbounded_access_logs",
"envoy.reloadable_features.enable_deprecated_v2_api_warning",
"envoy.reloadable_features.enable_dns_cache_circuit_breakers",
"envoy.reloadable_features.ext_authz_http_service_enable_case_sensitive_string_matcher",
"envoy.reloadable_features.fix_upgrade_response",
"envoy.reloadable_features.fixed_connection_close",
Expand Down
94 changes: 47 additions & 47 deletions source/common/upstream/resource_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,53 @@
namespace Envoy {
namespace Upstream {

struct ManagedResourceImpl : public BasicResourceLimitImpl {
ManagedResourceImpl(uint64_t max, Runtime::Loader& runtime, const std::string& runtime_key,
Stats::Gauge& open_gauge, Stats::Gauge& remaining)
: BasicResourceLimitImpl(max, runtime, runtime_key), open_gauge_(open_gauge),
remaining_(remaining) {
remaining_.set(max);
}

// Upstream::Resource
bool canCreate() override { return current_ < max(); }
void inc() override {
BasicResourceLimitImpl::inc();
updateRemaining();
open_gauge_.set(BasicResourceLimitImpl::canCreate() ? 0 : 1);
}
void decBy(uint64_t amount) override {
BasicResourceLimitImpl::decBy(amount);
updateRemaining();
open_gauge_.set(BasicResourceLimitImpl::canCreate() ? 0 : 1);
}

/**
* We set the gauge instead of incrementing and decrementing because,
* though atomics are used, it is possible for the current resource count
* to be greater than the supplied max.
*/
void updateRemaining() {
/**
* We cannot use std::max here because max() and current_ are
* unsigned and subtracting them may overflow.
*/
const uint64_t current_copy = current_;
remaining_.set(max() > current_copy ? max() - current_copy : 0);
}

/**
* A gauge to notify the live circuit breaker state. The gauge is set to 0
* to notify that the circuit breaker is not yet triggered.
*/
Stats::Gauge& open_gauge_;

/**
* The number of resources remaining before the circuit breaker opens.
*/
Stats::Gauge& remaining_;
};

/**
* Implementation of ResourceManager.
* NOTE: This implementation makes some assumptions which favor simplicity over correctness.
Expand Down Expand Up @@ -53,53 +100,6 @@ class ResourceManagerImpl : public ResourceManager {
ResourceLimit& connectionPools() override { return connection_pools_; }

private:
struct ManagedResourceImpl : public BasicResourceLimitImpl {
ManagedResourceImpl(uint64_t max, Runtime::Loader& runtime, const std::string& runtime_key,
Stats::Gauge& open_gauge, Stats::Gauge& remaining)
: BasicResourceLimitImpl(max, runtime, runtime_key), open_gauge_(open_gauge),
remaining_(remaining) {
remaining_.set(max);
}

// Upstream::Resource
bool canCreate() override { return current_ < max(); }
void inc() override {
BasicResourceLimitImpl::inc();
updateRemaining();
open_gauge_.set(BasicResourceLimitImpl::canCreate() ? 0 : 1);
}
void decBy(uint64_t amount) override {
BasicResourceLimitImpl::decBy(amount);
updateRemaining();
open_gauge_.set(BasicResourceLimitImpl::canCreate() ? 0 : 1);
}

/**
* We set the gauge instead of incrementing and decrementing because,
* though atomics are used, it is possible for the current resource count
* to be greater than the supplied max.
*/
void updateRemaining() {
/**
* We cannot use std::max here because max() and current_ are
* unsigned and subtracting them may overflow.
*/
const uint64_t current_copy = current_;
remaining_.set(max() > current_copy ? max() - current_copy : 0);
}

/**
* A gauge to notify the live circuit breaker state. The gauge is set to 0
* to notify that the circuit breaker is not yet triggered.
*/
Stats::Gauge& open_gauge_;

/**
* The number of resources remaining before the circuit breaker opens.
*/
Stats::Gauge& remaining_;
};

class RetryBudgetImpl : public ResourceLimit {
public:
RetryBudgetImpl(absl::optional<double> budget_percent,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ ClusterFactory::createClusterWithConfig(
Stats::ScopePtr&& stats_scope) {
Extensions::Common::DynamicForwardProxy::DnsCacheManagerFactoryImpl cache_manager_factory(
context.singletonManager(), context.dispatcher(), context.tls(), context.random(),
context.stats());
context.runtime(), context.stats());
envoy::config::cluster::v3::Cluster cluster_config = cluster;
if (cluster_config.has_upstream_http_protocol_options()) {
if (!proto_config.allow_insecure_cluster_options() &&
Expand Down
18 changes: 18 additions & 0 deletions source/extensions/common/dynamic_forward_proxy/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ envoy_cc_library(
"//include/envoy/event:dispatcher_interface",
"//include/envoy/singleton:manager_interface",
"//include/envoy/thread_local:thread_local_interface",
"//include/envoy/upstream:resource_manager_interface",
"@envoy_api//envoy/extensions/common/dynamic_forward_proxy/v3:pkg_cc_proto",
],
)
Expand All @@ -37,6 +38,7 @@ envoy_cc_library(
hdrs = ["dns_cache_impl.h"],
deps = [
":dns_cache_interface",
":dns_cache_resource_manager",
"//include/envoy/network:dns_interface",
"//include/envoy/thread_local:thread_local_interface",
"//source/common/common:cleanup_lib",
Expand All @@ -46,3 +48,19 @@ envoy_cc_library(
"@envoy_api//envoy/extensions/common/dynamic_forward_proxy/v3:pkg_cc_proto",
],
)

envoy_cc_library(
name = "dns_cache_resource_manager",
srcs = ["dns_cache_resource_manager.cc"],
hdrs = ["dns_cache_resource_manager.h"],
deps = [
":dns_cache_interface",
"//include/envoy/common:resource_interface",
"//include/envoy/stats:stats_interface",
"//source/common/common:assert_lib",
"//source/common/common:basic_resource_lib",
"//source/common/runtime:runtime_lib",
"//source/common/upstream:resource_manager_lib",
"@envoy_api//envoy/extensions/common/dynamic_forward_proxy/v3:pkg_cc_proto",
],
)
38 changes: 37 additions & 1 deletion source/extensions/common/dynamic_forward_proxy/dns_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "envoy/extensions/common/dynamic_forward_proxy/v3/dns_cache.pb.h"
#include "envoy/singleton/manager.h"
#include "envoy/thread_local/thread_local.h"
#include "envoy/upstream/resource_manager.h"

namespace Envoy {
namespace Extensions {
Expand Down Expand Up @@ -43,6 +44,32 @@ class DnsHostInfo {

using DnsHostInfoSharedPtr = std::shared_ptr<DnsHostInfo>;

#define ALL_DNS_CACHE_CIRCUIT_BREAKERS_STATS(OPEN_GAUGE, REMAINING_GAUGE) \
OPEN_GAUGE(rq_pending_open, Accumulate) \
REMAINING_GAUGE(rq_pending_remaining, Accumulate)

struct DnsCacheCircuitBreakersStats {
ALL_DNS_CACHE_CIRCUIT_BREAKERS_STATS(GENERATE_GAUGE_STRUCT, GENERATE_GAUGE_STRUCT)
};

/**
* A resource manager of DNS Cache.
*/
class DnsCacheResourceManager {
public:
virtual ~DnsCacheResourceManager() = default;

/**
* Returns the resource limit of pending requests to DNS.
*/
virtual ResourceLimit& pendingRequests() PURE;

/**
* Returns the reference of stats for dns cache circuit breakers.
*/
virtual DnsCacheCircuitBreakersStats& stats() PURE;
};

/**
* A cache of DNS hosts. Hosts will re-resolve their addresses or be automatically purged
* depending on configured policy.
Expand Down Expand Up @@ -148,6 +175,15 @@ class DnsCache {
* @return all hosts currently stored in the cache.
*/
virtual absl::flat_hash_map<std::string, DnsHostInfoSharedPtr> hosts() PURE;

/**
* Check if a DNS request is allowed given resource limits.
* @param pending_request optional pending request resource limit. If no resource limit is
* provided the internal DNS cache limit is used.
* @return RAII handle for pending request circuit breaker if the request was allowed.
*/
virtual Upstream::ResourceAutoIncDecPtr
canCreateDnsRequest(ResourceLimitOptRef pending_request) PURE;
};

using DnsCacheSharedPtr = std::shared_ptr<DnsCache>;
Expand Down Expand Up @@ -176,7 +212,7 @@ using DnsCacheManagerSharedPtr = std::shared_ptr<DnsCacheManager>;
DnsCacheManagerSharedPtr getCacheManager(Singleton::Manager& manager,
Event::Dispatcher& main_thread_dispatcher,
ThreadLocal::SlotAllocator& tls,
Runtime::RandomGenerator& random,
Runtime::RandomGenerator& random, Runtime::Loader& loader,
Stats::Scope& root_scope);

/**
Expand Down
23 changes: 21 additions & 2 deletions source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ namespace DynamicForwardProxy {

DnsCacheImpl::DnsCacheImpl(
Event::Dispatcher& main_thread_dispatcher, ThreadLocal::SlotAllocator& tls,
Runtime::RandomGenerator& random, Stats::Scope& root_scope,
Runtime::RandomGenerator& random, Runtime::Loader& loader, Stats::Scope& root_scope,
const envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig& config)
: main_thread_dispatcher_(main_thread_dispatcher),
dns_lookup_family_(Upstream::getDnsLookupFamilyFromEnum(config.dns_lookup_family())),
resolver_(main_thread_dispatcher.createDnsResolver({}, false)), tls_slot_(tls.allocateSlot()),
scope_(root_scope.createScope(fmt::format("dns_cache.{}.", config.name()))),
stats_{ALL_DNS_CACHE_STATS(POOL_COUNTER(*scope_), POOL_GAUGE(*scope_))},
stats_(generateDnsCacheStats(*scope_)),
resource_manager_(*scope_, loader, config.name(), config.dns_cache_circuit_breaker()),
refresh_interval_(PROTOBUF_GET_MS_OR_DEFAULT(config, dns_refresh_rate, 60000)),
failure_backoff_strategy_(
Config::Utility::prepareDnsRefreshStrategy<
Expand All @@ -46,6 +47,10 @@ DnsCacheImpl::~DnsCacheImpl() {
}
}

DnsCacheStats DnsCacheImpl::generateDnsCacheStats(Stats::Scope& scope) {
return {ALL_DNS_CACHE_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope))};
}

DnsCacheImpl::LoadDnsCacheEntryResult
DnsCacheImpl::loadDnsCacheEntry(absl::string_view host, uint16_t default_port,
LoadDnsCacheEntryCallbacks& callbacks) {
Expand All @@ -72,6 +77,20 @@ DnsCacheImpl::loadDnsCacheEntry(absl::string_view host, uint16_t default_port,
}
}

Upstream::ResourceAutoIncDecPtr
DnsCacheImpl::canCreateDnsRequest(ResourceLimitOptRef pending_requests) {
const auto has_pending_requests = pending_requests.has_value();
auto& current_pending_requests =
has_pending_requests ? pending_requests->get() : resource_manager_.pendingRequests();
if (!current_pending_requests.canCreate()) {
if (!has_pending_requests) {
stats_.dns_rq_pending_overflow_.inc();
}
return nullptr;
}
return std::make_unique<Upstream::ResourceAutoIncDec>(current_pending_requests);
}

absl::flat_hash_map<std::string, DnsHostInfoSharedPtr> DnsCacheImpl::hosts() {
absl::flat_hash_map<std::string, DnsHostInfoSharedPtr> ret;
for (const auto& host : primary_hosts_) {
Expand Down
Loading