Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(healthcheck) retry locking protected fn #37

Merged
merged 1 commit into from
Dec 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 77 additions & 34 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -175,50 +175,72 @@ local function fetch_target_list(self)
end


--- Run the given function holding a lock on the target list.
-- WARNING: the callback will run unprotected, so it should never
-- throw an error, but always return `nil + error` instead.
-- @param self The checker object
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target_list(self, fn)
--- Helper function to run the function holding a lock on the target list.
-- @see locking_target_list
local function run_fn_locked_target_list(premature, self, fn)

if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})

if not lock then
return nil, "failed to create lock:" .. lock_err
end

local ok, err = lock:lock(self.TARGET_LIST_LOCK)
if not ok then
return nil, "failed to acquire lock: " .. err
local pok, perr = pcall(resty_lock.lock, lock, self.TARGET_LIST_LOCK)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local target_list
target_list, err = fetch_target_list(self)
local target_list, err = fetch_target_list(self)

local final_ok, final_err

if target_list then
final_ok, final_err = fn(target_list)
final_ok, final_err = pcall(fn, target_list)
else
final_ok, final_err = nil, err
end

local ok
ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", self.TARGET_LIST_LOCK,
"': ", err)
"': ", err)
end

return final_ok, final_err
end


--- Run the given function holding a lock on the target list.
-- @param self The checker object
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target_list(self, fn)

local ok, err = run_fn_locked_target_list(false, self, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_fn_locked_target_list, self, fn)
if terr ~= nil then
return nil, terr
end

return true
end

return ok, err
end


--- Get a target
local function get_target(self, ip, port, hostname)
hostname = hostname or ip
Expand Down Expand Up @@ -416,17 +438,13 @@ end
------------------------------------------------------------------------------


-- Run the given function holding a lock on the target.
-- WARNING: the callback will run unprotected, so it should never
-- throw an error, but always return `nil + error` instead.
-- @param self The checker object
-- @param ip Target IP
-- @param port Target port
-- @param hostname Target hostname
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target(self, ip, port, hostname, fn)
--- Helper function to actually run the function holding a lock on the target.
-- @see locking_target
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
Expand All @@ -436,20 +454,45 @@ local function locking_target(self, ip, port, hostname, fn)
end
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)

local ok, err = lock:lock(lock_key)
if not ok then
return nil, "failed to acquire lock: " .. err
local pok, perr = pcall(resty_lock.lock, lock, lock_key)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local final_ok, final_err = fn()
local final_ok, final_err = pcall(fn)

ok, err = lock:unlock()
local ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
end

return final_ok, final_err

end


-- Run the given function holding a lock on the target.
-- @param self The checker object
-- @param ip Target IP
-- @param port Target port
-- @param hostname Target hostname
-- @param fn The function to execute
-- @return The results of the function; or true in case it fails locking and
-- will retry asynchronously; or nil+err in case it fails to retry.
local function locking_target(self, ip, port, hostname, fn)
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
if terr ~= nil then
return nil, terr
end

return true
end

return ok, err
end


Expand Down
49 changes: 48 additions & 1 deletion t/06-report_http_status.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use Cwd qw(cwd);

workers(1);

plan tests => repeat_each() * 50;
plan tests => repeat_each() * 53;

my $pwd = cwd();

Expand Down Expand Up @@ -444,3 +444,50 @@ checking unhealthy targets: nothing to do
--- no_error_log
unhealthy HTTP increment
event: target status '(127.0.0.1:2119)' from 'true' to 'false'


=== TEST 5: report_http_status() must work in log phase
--- http_config eval
qq{
$::HttpConfig
}
--- config
location = /t {
content_by_lua_block {
ngx.say("OK")
}
log_by_lua_block {
local we = require "resty.worker.events"
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
type = "http",
checks = {
passive = {
healthy = {
successes = 3,
},
unhealthy = {
tcp_failures = 2,
http_failures = 3,
}
}
}
})
local ok, err = checker:add_target("127.0.0.1", 2119, nil, true)
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
}
}
--- request
GET /t
--- response_body
OK
--- no_error_log
failed to acquire lock: API disabled in the context of log_by_lua