Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(healthchecker) port 2.x lock fixes to 1.5.x #113

Merged
merged 6 commits into from
Jul 6, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 150 additions & 98 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,24 @@ local cjson = require("cjson.safe").new()
local table_insert = table.insert
local table_remove = table.remove
local worker_events = require("resty.worker.events")
local resty_lock = require ("resty.lock")
local re_find = ngx.re.find
-- local resty_lock = require("resty.lock") -- required later in the file"
local ssl = require("ngx.ssl")
local resty_timer = require "resty.timer"
local bit = require("bit")
local re_find = ngx.re.find
local ngx_now = ngx.now
local ngx_worker_id = ngx.worker.id
local ngx_worker_pid = ngx.worker.pid
local ssl = require("ngx.ssl")
local resty_timer = require "resty.timer"
local pcall = pcall
local get_phase = ngx.get_phase
local type = type
local assert = assert

local new_tab
local nkeys
local is_array

do
local pcall = pcall
local ok

ok, new_tab = pcall(require, "table.new")
Expand Down Expand Up @@ -213,7 +216,126 @@ local function key_for(key_prefix, ip, port, hostname)
return string.format("%s:%s:%s%s", key_prefix, ip, port, hostname and ":" .. hostname or "")
end

local run_locked
do
-- resty_lock is restricted to this scope in order to keep sensitive
-- lock-handling code separate separate from all other business logic
--
-- If you need to use resty_lock in a way that is not covered by the
-- `run_locked` helper function defined below, it's strongly-advised to
-- define it fully within this scope unless you have a very good reason
--
-- (see https://github.com/Kong/lua-resty-healthcheck/pull/112)
local resty_lock = require "resty.lock"

local yieldable = {
rewrite = true,
access = true,
content = true,
timer = true,
}

local function run_in_timer(premature, fn, ...)
if premature then
return
end

return fn(...)
end

local function schedule(fn, ...)
return ngx.timer.at(0, run_in_timer, fn, ...)
end

-- timeout when yieldable
local timeout = 5

-- resty.lock consumes these options immediately, so this table can be reused
local opts = {
exptime = 10, -- timeout after which lock is released anyway
timeout = timeout, -- max wait time to acquire lock
}

---
-- Acquire a lock and run a function
--
-- The function call itself is wrapped with `pcall` to protect against
-- exception.
--
-- This function exhibits some special behavior when called during a
-- non-yieldable phase such as `init_worker` or `log`:
--
-- 1. The lock timeout is set to 0 to ensure that `resty.lock` does not
-- attempt to sleep/yield
-- 2. If acquiring the lock fails due to a timeout, `run_locked`
-- (this function) is re-scheduled to run in a timer. In this case,
-- the function returns `"scheduled"`
--
-- @param self The checker object
-- @param key the key/identifier to acquire a lock for
-- @param fn The function to execute
-- @param ... arguments that will be passed to fn
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
function run_locked(self, key, fn, ...)
-- we're extra extra extra defensive in this code path
local typ = type(key)
-- XXX is a number key ever expected?
assert(typ == "string" or typ == "number",
"unexpected lock key type: " .. typ)
key = tostring(key)

-- first aqcuire a lock or conditionally re-schedule ourselves in a timer
local lock
do
local yield = yieldable[get_phase()]

if yield then
opts.timeout = timeout
else
-- if yielding is not possible in the current phase, use a zero timeout
-- so that resty.lock will return `nil, "timeout"` immediately instead of
-- calling ngx.sleep()
opts.timeout = 0
end

local err
lock, err = resty_lock:new(self.shm_name, opts)
if not lock then
return nil, "failed creating lock for '" .. key .. "', " .. err
end

local elapsed
elapsed, err = lock:lock(key)

if not elapsed and err == "timeout" and not yield then
-- yielding is not possible in the current phase, so retry in a timer
local ok, terr = schedule(run_locked, self, key, fn, ...)
if not ok then
return nil, terr
end

return "scheduled"

elseif not elapsed then
return nil, "failed acquiring lock for '" .. key .. "', " .. err
end
end

local pok, perr, res = pcall(fn, ...)

local ok, err = lock:unlock()
if not ok then
self:log(ERR, "failed unlocking '", key, "', ", err)
end

if not pok then
return nil, perr
end

return perr, res
end
end
local checker = {}


Expand All @@ -235,48 +357,15 @@ local function fetch_target_list(self)
end


--- Helper function to run the function holding a lock on the target list.
-- @see locking_target_list
local function run_fn_locked_target_list(premature, self, fn)

if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})

if not lock then
return nil, "failed to create lock:" .. lock_err
end

local pok, perr = pcall(resty_lock.lock, lock, self.TARGET_LIST_LOCK)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local target_list, err = fetch_target_list(self)

local final_ok, final_err

if target_list then
final_ok, final_err = pcall(fn, target_list)
else
final_ok, final_err = nil, err
end

local ok
ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", self.TARGET_LIST_LOCK,
"': ", err)
local function with_target_list(self, fn)
local targets, err = fetch_target_list(self)
if not targets then
return nil, err
end

return final_ok, final_err
-- this is only ever called in the context of `run_locked`,
-- so no pcall needed
return fn(targets)
end


Expand All @@ -286,15 +375,10 @@ end
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target_list(self, fn)
local ok, err = run_locked(self, self.TARGET_LIST_LOCK, with_target_list, self, fn)

local ok, err = run_fn_locked_target_list(false, self, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_fn_locked_target_list, self, fn)
if terr ~= nil then
return nil, terr
end

return true
if ok == "scheduled" then
self:log(DEBUG, "target_list function re-scheduled in timer")
end

return ok, err
Expand Down Expand Up @@ -330,15 +414,15 @@ function checker:add_target(ip, port, hostname, is_healthy, hostheader)
local internal_health = is_healthy and "healthy" or "unhealthy"

local ok, err = locking_target_list(self, function(target_list)
local found = false
local found = false

-- check whether we already have this target
for _, target in ipairs(target_list) do
if target.ip == ip and target.port == port and target.hostname == (hostname) then
if target.purge_time == nil then
self:log(DEBUG, "adding an existing target: ", hostname or "", " ", ip,
":", port, " (ignoring)")
return
return false
end
target.purge_time = nil
found = true
Expand Down Expand Up @@ -535,41 +619,6 @@ end
------------------------------------------------------------------------------


--- Helper function to actually run the function holding a lock on the target.
-- @see locking_target
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})
if not lock then
return nil, "failed to create lock:" .. lock_err
end
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)

local pok, perr = pcall(resty_lock.lock, lock, lock_key)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local final_ok, final_err = pcall(fn)

local ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
end

return final_ok, final_err

end


-- Run the given function holding a lock on the target.
-- @param self The checker object
-- @param ip Target IP
Expand All @@ -579,14 +628,12 @@ end
-- @return The results of the function; or true in case it fails locking and
-- will retry asynchronously; or nil+err in case it fails to retry.
local function locking_target(self, ip, port, hostname, fn)
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
if terr ~= nil then
return nil, terr
end
local key = key_for(self.TARGET_LOCK, ip, port, hostname)

return true
local ok, err = run_locked(self, key, fn)

if ok == "scheduled" then
self:log(DEBUG, "target function for ", key, " was re-scheduled")
end

return ok, err
Expand Down Expand Up @@ -1119,7 +1166,12 @@ local function checker_callback(self, health_mode)
end

local list_to_check = {}
local targets = fetch_target_list(self)
local targets, err = fetch_target_list(self)
if not targets then
self:log(ERR, "checker_callback: ", err)
return
end

for _, target in ipairs(targets) do
local tgt = get_target(self, target.ip, target.port, target.hostname)
local internal_health = tgt and tgt.internal_health or nil
Expand Down