Skip to content

Commit

Permalink
fix(timers) prevent exhausting timers in init/worker_init (Kong#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tieske authored and AlinsRan committed Jun 2, 2023
1 parent 76c66f5 commit 45b37af
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 3 deletions.
50 changes: 48 additions & 2 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,52 @@ local worker_color = use_color and function(str) return ("\027["..tostring(31 +
-- Debug function
local function dump(...) print(require("pl.pretty").write({...})) end -- luacheck: ignore 211

-- cache timers in "init", "init_worker" phases so we use only a single timer
-- and do not run the risk of exhausting them for large sets
-- see https://github.com/Kong/lua-resty-healthcheck/issues/40
-- Below we'll temporarily use a patched version of ngx.timer.at, until we're
-- past the init and init_worker phases, after which we'll return to the regular
-- ngx.timer.at implementation
local ngx_timer_at do
local callback_list = {}

local function handler(premature)
if premature then
return
end

local list = callback_list
callback_list = {}

for _, args in ipairs(list) do
local ok, err = pcall(args[1], ngx_worker_exiting(), unpack(args, 2, args.n))
if not ok then
ngx.log(ngx.ERR, "timer failure: ", err)
end
end
end

ngx_timer_at = function(...)
local phase = ngx.get_phase()
if phase ~= "init" and phase ~= "init_worker" then
-- we're past init/init_worker, so replace this temp function with the
-- real-deal again, so from here on we run regular timers.
ngx_timer_at = ngx.timer.at
return ngx.timer.at(...)
end

local n = #callback_list
callback_list[n+1] = { n = select("#", ...), ... }
if n == 0 then
-- first one, so schedule the actual timer
return ngx.timer.at(0, handler)
end
return true
end

end


local _M = {}


Expand Down Expand Up @@ -273,7 +319,7 @@ local function locking_target_list(self, fn)

local ok, err = run_fn_locked_target_list(false, self, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_fn_locked_target_list, self, fn)
local _, terr = ngx_timer_at(0, run_fn_locked_target_list, self, fn)
if terr ~= nil then
return nil, terr
end
Expand Down Expand Up @@ -528,7 +574,7 @@ end
local function locking_target(self, ip, port, hostname, fn)
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
local _, terr = ngx_timer_at(0, run_mutexed_fn, self, ip, port, hostname, fn)
if terr ~= nil then
return nil, terr
end
Expand Down
2 changes: 2 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ Versioning is strictly based on [Semantic Versioning](https://semver.org/)
* fix: prevent target-list from being nil, due to async behaviour [#44](https://github.com/Kong/lua-resty-healthcheck/pull/44)
* fix: replace timer and node-wide locks with resty-timer, to prevent interval
skips [#59](https://github.com/Kong/lua-resty-healthcheck/pull/59)
* fix: do not run out of timers during init/init_worker when adding a vast
amount of targets [#57](https://github.com/Kong/lua-resty-healthcheck/pull/57)

### 1.3.0 (17-Jun-2020)

Expand Down
88 changes: 87 additions & 1 deletion t/02-add_target.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use Cwd qw(cwd);

workers(1);

plan tests => repeat_each() * (blocks() * 4) + 6;
plan tests => repeat_each() * (blocks() * 4) + 5;

my $pwd = cwd();

Expand Down Expand Up @@ -166,3 +166,89 @@ checking healthy targets: #1

--- no_error_log
checking unhealthy targets: #1



=== TEST 4: calling add_target() repeatedly does not exhaust timers
--- http_config eval
qq{
$::HttpConfig

server {
listen 2113;
location = /status {
return 200;
}
}
lua_max_pending_timers 100;

init_worker_by_lua_block {
--error("erreur")
local resty_lock = require ("resty.lock")
local we = require "resty.worker.events"
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
checks = {
active = {
http_path = "/status",
healthy = {
interval = 0.1,
successes = 1,
},
unhealthy = {
interval = 0.1,
tcp_failures = 1,
http_failures = 1,
}
}
}
})

-- lock the key, so adding targets will fallback on timers
local lock = assert(resty_lock:new(checker.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
}))
assert(lock:lock(checker.TARGET_LIST_LOCK))

local addr = {
127, 0, 0, 1
}
-- add 10000 check, exhausting timers...
for i = 0, 150 do
addr[4] = addr[4] + 1
if addr[4] > 255 then
addr[4] = 1
addr[3] = addr[3] + 1
if addr[3] > 255 then
addr[3] = 1
addr[2] = addr[2] + 1
if addr[2] > 255 then
addr[2] = 1
addr[1] = addr[1] + 1
end
end
end
local ok, err = assert(checker:add_target(table.concat(addr, "."), 2113, nil, true))
end
}

}

--- config
location = /t {
content_by_lua_block {
ngx.say(true)
ngx.exit(200)
}
}

--- request
GET /t
--- response_body
true
--- no_error_log
too many pending timers

0 comments on commit 45b37af

Please sign in to comment.