Skip to content

Commit

Permalink
fix(healthcheck) retry locking protected fn
Browse files Browse the repository at this point in the history
When ngx.sleep API is not available (e.g. in the log phase) it's not possible to
lock using lua-resty-lock and functions that must run protected were failing.

This change adds a retry method that starts a new light thread that has access
to ngx.sleep and will succeed to lock.

Fix Kong/kong#5137
  • Loading branch information
locao committed Nov 22, 2019
1 parent f880430 commit 0ce06a2
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 16 deletions.
53 changes: 38 additions & 15 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -416,17 +416,13 @@ end
------------------------------------------------------------------------------


-- Run the given function holding a lock on the target.
-- WARNING: the callback will run unprotected, so it should never
-- throw an error, but always return `nil + error` instead.
-- @param self The checker object
-- @param ip Target IP
-- @param port Target port
-- @param hostname Target hostname
-- @param fn The function to execute
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target(self, ip, port, hostname, fn)
--- Helper function to actually run the function holding a lock on the target.
-- @see locking_target
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
Expand All @@ -436,20 +432,47 @@ local function locking_target(self, ip, port, hostname, fn)
end
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)

local ok, err = lock:lock(lock_key)
if not ok then
return nil, "failed to acquire lock: " .. err
local pok, perr = pcall(resty_lock.lock, lock, lock_key)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local final_ok, final_err = fn()

ok, err = lock:unlock()
local ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
end

return final_ok, final_err

end


-- Run the given function holding a lock on the target.
-- WARNING: the callback will run unprotected, so it should never
-- throw an error, but always return `nil + error` instead.
-- @param self The checker object
-- @param ip Target IP
-- @param port Target port
-- @param hostname Target hostname
-- @param fn The function to execute
-- @return The results of the function; or true in case it fails locking and
-- will retry asynchronously; or nil+err in case it fails to retry.
local function locking_target(self, ip, port, hostname, fn)
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
if terr ~= nil then
return nil, terr
end

return true
end

return ok, err
end


Expand Down
49 changes: 48 additions & 1 deletion t/06-report_http_status.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use Cwd qw(cwd);

workers(1);

plan tests => repeat_each() * 50;
plan tests => repeat_each() * 53;

my $pwd = cwd();

Expand Down Expand Up @@ -444,3 +444,50 @@ checking unhealthy targets: nothing to do
--- no_error_log
unhealthy HTTP increment
event: target status '(127.0.0.1:2119)' from 'true' to 'false'


=== TEST 5: report_http_status() must work in log phase
--- http_config eval
qq{
$::HttpConfig
}
--- config
location = /t {
content_by_lua_block {
ngx.say("OK")
}
log_by_lua_block {
local we = require "resty.worker.events"
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
type = "http",
checks = {
passive = {
healthy = {
successes = 3,
},
unhealthy = {
tcp_failures = 2,
http_failures = 3,
}
}
}
})
local ok, err = checker:add_target("127.0.0.1", 2119, nil, true)
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
}
}
--- request
GET /t
--- response_body
OK
--- no_error_log
failed to acquire lock: API disabled in the context of log_by_lua

0 comments on commit 0ce06a2

Please sign in to comment.