Skip to content
This repository has been archived by the owner on Jul 14, 2021. It is now read-only.

Make sync calls to FSMs safe against shutdown which message is still in queue #85

Merged
merged 3 commits into from
Jan 7, 2013
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions apps/pushy/src/pushy_job_state.erl
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ start_link(Job) ->
get_job_state(JobId) ->
case pushy_job_state_sup:get_process(JobId) of
not_found -> not_found;
Pid -> gen_fsm:sync_send_all_state_event(Pid, get_job_status)
Pid -> safe_sync_send_all_state_event(Pid, get_job_status)
end.

stop_job(JobId) ->
case pushy_job_state_sup:get_process(JobId) of
not_found -> not_found;
Pid -> gen_fsm:sync_send_all_state_event(Pid, stop_job)
Pid -> safe_sync_send_all_state_event(Pid, stop_job)
end.

%%%
Expand Down Expand Up @@ -366,3 +366,16 @@ terminalize(timed_out) -> terminal;
terminalize(new) -> new;
terminalize(ready) -> ready;
terminalize(running) -> running.

%% We can end up in a race condition with sync messages where
%% the process terminates and the message is still in the queue
%%
%% This deals with the race condition by matching the error
%% message returned and converting it to `not_found`.
safe_sync_send_all_state_event(Pid, Message) ->
case catch gen_fsm:sync_send_all_state_event(Pid, Message) of
{'EXIT', {shutdown, _Details}} ->
not_found;
Else ->
Else
end.
15 changes: 14 additions & 1 deletion apps/pushy/src/pushy_node_state.erl
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ rehab_interval() ->
call(NodeRef, Message) ->
case pushy_node_state_sup:get_process(NodeRef) of
Pid when is_pid(Pid) ->
gen_fsm:sync_send_all_state_event(Pid, Message, infinity);
safe_sync_send_all_state_event(Pid, Message);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a criticism of your changes, but why do we have the infinity timeout here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure. It was there before and I had wondered that. job_state didn't have it specified.

Should I remove ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why it is there, but I was experimenting with removing it in a different context and everything seemed to work fine w/o that.

The temptation of removing it and then being able to DRY up the two safe_sync_send_all_state_event routines is huge.

undefined ->
undefined
end.
Expand Down Expand Up @@ -471,3 +471,16 @@ extract_job_id(Data) ->
_ ->
invalid_job_id
end.

%% We can end up in a race condition with sync messages where
%% the process terminates and the message is still in the queue
%%
%% This deals with the race condition by matching the error
%% message returned and converting it to `undefined`.
safe_sync_send_all_state_event(Pid, Message) ->
case catch gen_fsm:sync_send_all_state_event(Pid, Message, infinity) of
{'EXIT', {shutdown, _Details}} ->
undefined;
Else ->
Else
end.