-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add request timeout for ValidatorSites: #2902
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,15 +26,15 @@ | |
#include <ripple/basics/Slice.h> | ||
#include <ripple/json/json_reader.h> | ||
#include <ripple/protocol/JsonFields.h> | ||
#include <boost/algorithm/clamp.hpp> | ||
#include <boost/regex.hpp> | ||
#include <algorithm> | ||
|
||
namespace ripple { | ||
|
||
// default site query frequency - 5 minutes | ||
auto constexpr DEFAULT_REFRESH_INTERVAL = std::chrono::minutes{5}; | ||
auto constexpr ERROR_RETRY_INTERVAL = std::chrono::seconds{30}; | ||
unsigned short constexpr MAX_REDIRECTS = 3; | ||
auto constexpr default_refresh_interval = std::chrono::minutes{5}; | ||
auto constexpr error_retry_interval = std::chrono::seconds{30}; | ||
unsigned short constexpr max_redirects = 3; | ||
|
||
ValidatorSite::Site::Resource::Resource (std::string uri_) | ||
: uri {std::move(uri_)} | ||
|
@@ -82,22 +82,24 @@ ValidatorSite::Site::Site (std::string uri) | |
: loadedResource {std::make_shared<Resource>(std::move(uri))} | ||
, startingResource {loadedResource} | ||
, redirCount {0} | ||
, refreshInterval {DEFAULT_REFRESH_INTERVAL} | ||
, refreshInterval {default_refresh_interval} | ||
, nextRefresh {clock_type::now()} | ||
{ | ||
} | ||
|
||
ValidatorSite::ValidatorSite ( | ||
boost::asio::io_service& ios, | ||
ValidatorList& validators, | ||
beast::Journal j) | ||
beast::Journal j, | ||
std::chrono::seconds timeout) | ||
: ios_ (ios) | ||
, validators_ (validators) | ||
, j_ (j) | ||
, timer_ (ios_) | ||
, fetching_ (false) | ||
, pending_ (false) | ||
, stopping_ (false) | ||
, requestTimeout_ (timeout) | ||
{ | ||
} | ||
|
||
|
@@ -153,7 +155,7 @@ ValidatorSite::start () | |
{ | ||
std::lock_guard <std::mutex> lock{state_mutex_}; | ||
if (timer_.expires_at() == clock_type::time_point{}) | ||
setTimer (); | ||
setTimer (lock); | ||
} | ||
|
||
void | ||
|
@@ -168,20 +170,29 @@ ValidatorSite::stop() | |
{ | ||
std::unique_lock<std::mutex> lock{state_mutex_}; | ||
stopping_ = true; | ||
cv_.wait(lock, [&]{ return ! fetching_; }); | ||
|
||
// work::cancel() must be called before the | ||
// cv wait in order to kick any asio async operations | ||
// that might be pending. | ||
if(auto sp = work_.lock()) | ||
sp->cancel(); | ||
cv_.wait(lock, [&]{ return ! fetching_; }); | ||
|
||
error_code ec; | ||
timer_.cancel(ec); | ||
// docs indicate cancel() can throw, but this should be | ||
// reconsidered if it changes to noexcept | ||
try | ||
{ | ||
timer_.cancel(); | ||
} | ||
catch (boost::system::system_error const&) | ||
{ | ||
} | ||
stopping_ = false; | ||
pending_ = false; | ||
cv_.notify_all(); | ||
} | ||
|
||
void | ||
ValidatorSite::setTimer () | ||
ValidatorSite::setTimer (std::lock_guard<std::mutex>& state_lock) | ||
{ | ||
std::lock_guard <std::mutex> lock{sites_mutex_}; | ||
|
||
|
@@ -196,31 +207,54 @@ ValidatorSite::setTimer () | |
pending_ = next->nextRefresh <= clock_type::now(); | ||
cv_.notify_all(); | ||
timer_.expires_at (next->nextRefresh); | ||
timer_.async_wait (std::bind (&ValidatorSite::onTimer, this, | ||
std::distance (sites_.begin (), next), std::placeholders::_1)); | ||
auto idx = std::distance (sites_.begin (), next); | ||
timer_.async_wait ([this, idx] (boost::system::error_code const& ec) | ||
{ | ||
this->onTimer (idx, ec); | ||
}); | ||
} | ||
} | ||
|
||
void | ||
ValidatorSite::makeRequest ( | ||
std::shared_ptr<Site::Resource> resource, | ||
std::size_t siteIdx, | ||
std::lock_guard<std::mutex>& lock) | ||
std::lock_guard<std::mutex>& sites_lock) | ||
{ | ||
fetching_ = true; | ||
sites_[siteIdx].activeResource = resource; | ||
std::shared_ptr<detail::Work> sp; | ||
auto timeoutCancel = | ||
[this] () | ||
{ | ||
std::lock_guard <std::mutex> lock_state{state_mutex_}; | ||
// docs indicate cancel_one() can throw, but this | ||
// should be reconsidered if it changes to noexcept | ||
try | ||
{ | ||
timer_.cancel_one(); | ||
} | ||
catch (boost::system::system_error const&) | ||
{ | ||
} | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This overload of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
auto onFetch = | ||
[this, siteIdx] (error_code const& err, detail::response_type&& resp) | ||
[this, siteIdx, timeoutCancel] ( | ||
error_code const& err, detail::response_type&& resp) | ||
{ | ||
timeoutCancel (); | ||
onSiteFetch (err, std::move(resp), siteIdx); | ||
}; | ||
|
||
auto onFetchFile = | ||
[this, siteIdx] (error_code const& err, std::string const& resp) | ||
{ | ||
onTextFetch (err, resp, siteIdx); | ||
}; | ||
[this, siteIdx, timeoutCancel] ( | ||
error_code const& err, std::string const& resp) | ||
{ | ||
timeoutCancel (); | ||
onTextFetch (err, resp, siteIdx); | ||
}; | ||
|
||
JLOG (j_.debug()) << "Starting request for " << resource->uri; | ||
|
||
if (resource->pUrl.scheme == "https") | ||
{ | ||
|
@@ -252,6 +286,34 @@ ValidatorSite::makeRequest ( | |
|
||
work_ = sp; | ||
sp->run (); | ||
// start a timer for the request, which shouldn't take more | ||
// than requestTimeout_ to complete | ||
std::lock_guard <std::mutex> lock_state{state_mutex_}; | ||
timer_.expires_after (requestTimeout_); | ||
timer_.async_wait ([this, siteIdx] (boost::system::error_code const& ec) | ||
{ | ||
this->onRequestTimeout (siteIdx, ec); | ||
}); | ||
} | ||
|
||
void | ||
ValidatorSite::onRequestTimeout ( | ||
std::size_t siteIdx, | ||
error_code const& ec) | ||
{ | ||
if (ec) | ||
return; | ||
|
||
{ | ||
std::lock_guard <std::mutex> lock_site{sites_mutex_}; | ||
JLOG (j_.warn()) << | ||
"Request for " << sites_[siteIdx].activeResource->uri << | ||
" took too long"; | ||
} | ||
|
||
std::lock_guard<std::mutex> lock_state{state_mutex_}; | ||
if(auto sp = work_.lock()) | ||
sp->cancel(); | ||
} | ||
|
||
void | ||
|
@@ -268,14 +330,12 @@ ValidatorSite::onTimer ( | |
return; | ||
} | ||
|
||
std::lock_guard <std::mutex> lock{sites_mutex_}; | ||
sites_[siteIdx].nextRefresh = | ||
clock_type::now() + sites_[siteIdx].refreshInterval; | ||
|
||
assert(! fetching_); | ||
sites_[siteIdx].redirCount = 0; | ||
try | ||
{ | ||
std::lock_guard <std::mutex> lock{sites_mutex_}; | ||
sites_[siteIdx].nextRefresh = | ||
clock_type::now() + sites_[siteIdx].refreshInterval; | ||
sites_[siteIdx].redirCount = 0; | ||
// the WorkSSL client can throw if SSL init fails | ||
makeRequest(sites_[siteIdx].startingResource, siteIdx, lock); | ||
} | ||
|
@@ -292,7 +352,7 @@ void | |
ValidatorSite::parseJsonResponse ( | ||
std::string const& res, | ||
std::size_t siteIdx, | ||
std::lock_guard<std::mutex>& lock) | ||
std::lock_guard<std::mutex>& sites_lock) | ||
{ | ||
Json::Reader r; | ||
Json::Value body; | ||
|
@@ -370,18 +430,23 @@ ValidatorSite::parseJsonResponse ( | |
if (body.isMember ("refresh_interval") && | ||
body["refresh_interval"].isNumeric ()) | ||
{ | ||
// TODO: should we sanity check/clamp this value | ||
// to something reasonable? | ||
sites_[siteIdx].refreshInterval = | ||
std::chrono::minutes{body["refresh_interval"].asUInt ()}; | ||
using namespace std::chrono_literals; | ||
std::chrono::minutes const refresh = | ||
boost::algorithm::clamp( | ||
std::chrono::minutes {body["refresh_interval"].asUInt ()}, | ||
1min, | ||
24h); | ||
sites_[siteIdx].refreshInterval = refresh; | ||
sites_[siteIdx].nextRefresh = | ||
clock_type::now() + sites_[siteIdx].refreshInterval; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the following code is a bit more consistent with time units and possibly easier to read:
Your choice. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 - fixed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion... |
||
} | ||
} | ||
|
||
std::shared_ptr<ValidatorSite::Site::Resource> | ||
ValidatorSite::processRedirect ( | ||
detail::response_type& res, | ||
std::size_t siteIdx, | ||
std::lock_guard<std::mutex>& lock) | ||
std::lock_guard<std::mutex>& sites_lock) | ||
{ | ||
using namespace boost::beast::http; | ||
std::shared_ptr<Site::Resource> newLocation; | ||
|
@@ -395,7 +460,7 @@ ValidatorSite::processRedirect ( | |
throw std::runtime_error{"missing location"}; | ||
} | ||
|
||
if (sites_[siteIdx].redirCount == MAX_REDIRECTS) | ||
if (sites_[siteIdx].redirCount == max_redirects) | ||
{ | ||
JLOG (j_.warn()) << | ||
"Exceeded max redirects for validator list at " << | ||
|
@@ -435,6 +500,8 @@ ValidatorSite::onSiteFetch( | |
{ | ||
{ | ||
std::lock_guard <std::mutex> lock_sites{sites_mutex_}; | ||
JLOG (j_.debug()) << "Got completion for " | ||
<< sites_[siteIdx].activeResource->uri; | ||
auto onError = [&](std::string const& errMsg, bool retry) | ||
{ | ||
sites_[siteIdx].lastRefreshStatus.emplace( | ||
|
@@ -443,7 +510,7 @@ ValidatorSite::onSiteFetch( | |
errMsg}); | ||
if (retry) | ||
sites_[siteIdx].nextRefresh = | ||
clock_type::now() + ERROR_RETRY_INTERVAL; | ||
clock_type::now() + error_retry_interval; | ||
}; | ||
if (ec) | ||
{ | ||
|
@@ -506,7 +573,7 @@ ValidatorSite::onSiteFetch( | |
std::lock_guard <std::mutex> lock_state{state_mutex_}; | ||
fetching_ = false; | ||
if (! stopping_) | ||
setTimer (); | ||
setTimer (lock_state); | ||
cv_.notify_all(); | ||
} | ||
|
||
|
@@ -547,7 +614,7 @@ ValidatorSite::onTextFetch( | |
std::lock_guard <std::mutex> lock_state{state_mutex_}; | ||
fetching_ = false; | ||
if (! stopping_) | ||
setTimer (); | ||
setTimer (lock_state); | ||
cv_.notify_all(); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just an observation. When I first saw this my inclination was to move the
Journal
to the end of the arguments, since that matches our usual parameter order. I see you made an exception to that expectation so you could default the timeout.After playing around with it a bit I'm comfortable with your choice. There's (currently) only one place where the timeout is explicitly passed in by the caller, and that's in a unit test.
Not asking for a change. I just thought you'd want to know that I played around with the options and agree that you made the right choice.