Skip to content

Commit

Permalink
fix(healthcheck) retry locking protected fn
Browse files Browse the repository at this point in the history
When ngx.sleep API is not available (e.g. in the log phase) it's not possible to
lock using lua-resty-lock and functions that must run protected were failing.

This change adds retry methods that start new light threads that have access
to ngx.sleep and will succeed to lock.

Fix Kong/kong#5137
  • Loading branch information
locao committed Dec 13, 2019
1 parent f880430 commit 8c6a96a
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 35 deletions.
111 changes: 77 additions & 34 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -175,50 +175,72 @@ local function fetch_target_list(self)
end


--- Run the given function holding a lock on the target list.
-- WARNING: the callback will run unprotected, so it should never
-- throw an error, but always return `nil + error` instead.
-- @param self The checker object
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target_list(self, fn)
--- Helper function to run the function holding a lock on the target list.
-- @see locking_target_list
local function run_fn_locked_target_list(premature, self, fn)

if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})

if not lock then
return nil, "failed to create lock:" .. lock_err
end

local ok, err = lock:lock(self.TARGET_LIST_LOCK)
if not ok then
return nil, "failed to acquire lock: " .. err
local pok, perr = pcall(resty_lock.lock, lock, self.TARGET_LIST_LOCK)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local target_list
target_list, err = fetch_target_list(self)
local target_list, err = fetch_target_list(self)

local final_ok, final_err

if target_list then
final_ok, final_err = fn(target_list)
final_ok, final_err = pcall(fn, target_list)
else
final_ok, final_err = nil, err
end

local ok
ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", self.TARGET_LIST_LOCK,
"': ", err)
"': ", err)
end

return final_ok, final_err
end


--- Run the given function holding a lock on the target list.
-- @param self The checker object
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target_list(self, fn)

local ok, err = run_fn_locked_target_list(false, self, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_fn_locked_target_list, self, fn)
if terr ~= nil then
return nil, terr
end

return true
end

return ok, err
end


--- Get a target
local function get_target(self, ip, port, hostname)
hostname = hostname or ip
Expand Down Expand Up @@ -416,17 +438,13 @@ end
------------------------------------------------------------------------------


-- Run the given function holding a lock on the target.
-- WARNING: the callback will run unprotected, so it should never
-- throw an error, but always return `nil + error` instead.
-- @param self The checker object
-- @param ip Target IP
-- @param port Target port
-- @param hostname Target hostname
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target(self, ip, port, hostname, fn)
--- Helper function to actually run the function holding a lock on the target.
-- @see locking_target
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
Expand All @@ -436,20 +454,45 @@ local function locking_target(self, ip, port, hostname, fn)
end
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)

local ok, err = lock:lock(lock_key)
if not ok then
return nil, "failed to acquire lock: " .. err
local pok, perr = pcall(resty_lock.lock, lock, lock_key)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local final_ok, final_err = fn()
local final_ok, final_err = pcall(fn)

ok, err = lock:unlock()
local ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
end

return final_ok, final_err

end


-- Run the given function holding a lock on the target.
-- @param self The checker object
-- @param ip Target IP
-- @param port Target port
-- @param hostname Target hostname
-- @param fn The function to execute
-- @return The results of the function; or true in case it fails locking and
-- will retry asynchronously; or nil+err in case it fails to retry.
local function locking_target(self, ip, port, hostname, fn)
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
if terr ~= nil then
return nil, terr
end

return true
end

return ok, err
end


Expand Down
49 changes: 48 additions & 1 deletion t/06-report_http_status.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use Cwd qw(cwd);

workers(1);

plan tests => repeat_each() * 50;
plan tests => repeat_each() * 53;

my $pwd = cwd();

Expand Down Expand Up @@ -444,3 +444,50 @@ checking unhealthy targets: nothing to do
--- no_error_log
unhealthy HTTP increment
event: target status '(127.0.0.1:2119)' from 'true' to 'false'


=== TEST 5: report_http_status() must work in log phase
--- http_config eval
qq{
$::HttpConfig
}
--- config
location = /t {
content_by_lua_block {
ngx.say("OK")
}
log_by_lua_block {
local we = require "resty.worker.events"
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
type = "http",
checks = {
passive = {
healthy = {
successes = 3,
},
unhealthy = {
tcp_failures = 2,
http_failures = 3,
}
}
}
})
local ok, err = checker:add_target("127.0.0.1", 2119, nil, true)
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
}
}
--- request
GET /t
--- response_body
OK
--- no_error_log
failed to acquire lock: API disabled in the context of log_by_lua

0 comments on commit 8c6a96a

Please sign in to comment.