Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ instead `badarg`.
- Fixed a bug where empty atom could not be created on some platforms, thus breaking receiving a message for a registered process from an OTP node.
- Fix a memory leak in distribution when a BEAM node would monitor a process by name.
- Fix `list_to_integer`, it was likely buggy with integers close to INT64_MAX
- Added missing support for supervisor `one_for_all` strategy.
- Supervisor now honors period and intensity options.
- Fix supervisor crash if a child fails to restart.

## [0.6.7] - Unreleased

Expand Down
249 changes: 232 additions & 17 deletions libs/estdlib/src/supervisor.erl
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,28 @@
start :: {module(), atom(), [any()] | undefined},
restart :: restart(),
shutdown :: shutdown(),
type :: child_type,
type :: child_type(),
modules = [] :: [module()] | dynamic
}).
-record(state, {restart_strategy :: strategy(), children = [] :: [#child{}]}).
-record(state, {
restart_strategy = one_for_one :: strategy(),
intensity = 3 :: non_neg_integer(),
period = 5 :: pos_integer(),
restart_count = 0,
restarts = [],
children = [] :: [#child{}]
}).

%% Used to trim stale restarts when the 'intensity' value is large.
%% The number of restarts before triggering a purge of restarts older
%% than 'period', so stale restarts do not continue to consume ram for
%% the sake of MCUs with limited memory. In the future a function
%% could be used to set a sane default for the platform (OTP uses 1000).
-define(STALE_RESTART_LIMIT, 100).

start_link(Module, Args) ->
gen_server:start_link(?MODULE, {Module, Args}, []).

start_link(SupName, Module, Args) ->
gen_server:start_link(SupName, ?MODULE, {Module, Args}, []).

Expand All @@ -119,16 +134,27 @@ count_children(Supervisor) ->
init({Mod, Args}) ->
erlang:process_flag(trap_exit, true),
case Mod:init(Args) of
{ok, {{Strategy, _Intensity, _Period}, StartSpec}} ->
State = init_state(StartSpec, #state{restart_strategy = Strategy}),
{ok, {{Strategy, Intensity, Period}, StartSpec}} ->
State = init_state(StartSpec, #state{
restart_strategy = Strategy,
intensity = Intensity,
period = Period
}),
NewChildren = start_children(State#state.children, []),
{ok, State#state{children = NewChildren}};
{ok, {#{strategy := Strategy}, StartSpec}} ->
State = init_state(StartSpec, #state{restart_strategy = Strategy}),
{ok, {#{} = SupSpec, StartSpec}} ->
Strategy = maps:get(strategy, SupSpec, one_for_one),
Intensity = maps:get(intensity, SupSpec, 3),
Period = maps:get(period, SupSpec, 5),
State = init_state(StartSpec, #state{
restart_strategy = Strategy,
intensity = Intensity,
period = Period
}),
NewChildren = start_children(State#state.children, []),
{ok, State#state{children = NewChildren}};
Error ->
{stop, {bad_return, {mod, init, Error}}}
{stop, {bad_return, {Mod, init, Error}}}
end.

-spec child_spec_to_record(child_spec()) -> #child{}.
Expand Down Expand Up @@ -172,7 +198,7 @@ child_spec_to_record(#{id := ChildId, start := MFA} = ChildMap) ->
init_state([ChildSpec | T], State) ->
Child = child_spec_to_record(ChildSpec),
NewChildren = [Child | State#state.children],
init_state(T, #state{children = NewChildren});
init_state(T, State#state{children = NewChildren});
init_state([], State) ->
State#state{children = lists:reverse(State#state.children)}.

Expand All @@ -184,7 +210,50 @@ start_children([Child | T], StartedC) ->
start_children([], StartedC) ->
StartedC.

restart_child(Pid, Reason, State) ->
restart_child(Pid, Reason, #state{restart_strategy = one_for_one} = State) ->
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function's name is confusing. Can we call it something like handle_child_exit instead? So we won't confuse it with restart_child/2.

Also, we should check the restart_strategy only in the case the child is not being terminated to avoid duplication.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I found the name confusing too, since terminate/2 was added to main. I think making it clear that this is the handler for child exits is a very good idea.

case lists:keyfind(Pid, #child.pid, State#state.children) of
false ->
{ok, State};
#child{restart = {terminating, temporary, From}} ->
gen_server:reply(From, ok),
NewChildren = lists:keydelete(Pid, #child.pid, State#state.children),
{ok, State#state{children = NewChildren}};
#child{restart = {terminating, Restart, From}} = Child ->
gen_server:reply(From, ok),
NewChildren = lists:keyreplace(Pid, #child.pid, State#state.children, Child#child{
pid = undefined, restart = Restart
}),
{ok, State#state{children = NewChildren}};
#child{} = Child ->
case should_restart(Reason, Child#child.restart) of
true ->
case add_restart(State) of
{ok, State1} ->
case try_start(Child) of
{ok, NewPid, _Result} ->
NewChild = Child#child{pid = NewPid},
Children = lists:keyreplace(
Pid, #child.pid, State1#state.children, NewChild
),
{ok, State1#state{children = Children}};
{error, _} ->
erlang:send_after(
50, self(), {try_again_restart, Child#child.id}
),
{ok, State1}
end;
{shutdown, State1} ->
RemainingChildren = lists:keydelete(
Pid, #child.pid, State#state.children
),
{shutdown, State1#state{children = RemainingChildren}}
end;
false ->
Children = lists:keydelete(Pid, #child.pid, State#state.children),
{ok, State#state{children = Children}}
end
end;
restart_child(Pid, Reason, #state{restart_strategy = one_for_all} = State) ->
case lists:keyfind(Pid, #child.pid, State#state.children) of
false ->
{ok, State};
Expand All @@ -201,13 +270,21 @@ restart_child(Pid, Reason, State) ->
#child{} = Child ->
case should_restart(Reason, Child#child.restart) of
true ->
case try_start(Child) of
{ok, NewPid, _Result} ->
NewChild = Child#child{pid = NewPid},
Children = lists:keyreplace(
Pid, #child.pid, State#state.children, NewChild
case add_restart(State) of
{ok, State1} ->
Siblings = lists:keydelete(Pid, #child.pid, State#state.children),
case restart_many_children(Child, Siblings) of
{ok, NewChildren} ->
{ok, State1#state{children = NewChildren}};
{ok, NewChildren, RetryID} ->
erlang:send_after(50, self(), {try_again_restart, RetryID}),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a different logic, but I would favor a timeout return for the gen_server over send_after/3, with a queue of restarts we need to tackle.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A timeout does make more sense here, that is closer to what I originally envisioned. I did end up implementing most of my changes differently internally than OTP does, mostly for the sake of simplicity and/or smaller memory footprint, I just wanted the behaviors to be the same for the user.

OTP does not use any delay between restart attempts, part of the reason I used a send_after was to give a slight delay that might allow for a gc or other external change that might improve the chances of a successful restart of the child, but maybe we should just immediately (re)queue all restart attempts.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly, our current gen_server implementation does not support {ok, State, {timeout, Time, Message} returns from callbacks. I was thinking that adding hibernation support might be nice (especially for the benefit of supervisors), but I think supporting timeout actions in returns is even more important.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know about the timeout tuple that didn't exist when I learnt about gen_server. We should indeed implement it and meanwhile we could use send_after in this PR unless the timeout tuple is merged first.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know about the timeout tuple that didn't exist when I learnt about gen_server. We should indeed implement it and meanwhile we could use send_after in this PR unless the timeout tuple is merged first.

It looks perfect for this situation, but I am struggling to get a test to pass on OTP using it. I did all of this work with test driven development, writing the test for OTP first, and then making sure my implementation passes as expected. It looks like a fairly simple improvement, once I understand how it’s supposed to work ;-)

{ok, State1#state{children = NewChildren}}
end;
{shutdown, State1} ->
RemainingChildren = lists:keydelete(
Pid, #child.pid, State#state.children
),
{ok, State#state{children = Children}}
{shutdown, State1#state{children = RemainingChildren}}
end;
false ->
Children = lists:keydelete(Pid, #child.pid, State#state.children),
Expand Down Expand Up @@ -309,6 +386,26 @@ handle_info({ensure_killed, Pid}, State) ->
exit(Pid, kill),
{noreply, State}
end;
handle_info({try_again_restart, Id}, State) ->
Child = lists:keyfind(Id, #child.id, State#state.children),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The child can be gone here.

case add_restart(State) of
{ok, State1} ->
case try_start(Child) of
{ok, NewPid, _Result} ->
UpdatedChildren = lists:keyreplace(
Id, Child#child.id, State#state.children, Child#child{pid = NewPid}
),
{noreply, State#state{children = UpdatedChildren}};
{error, {_, _}} ->
erlang:send_after(50, self(), {try_again_restart, Id}),
{noreply, State1}
end;
{shutdown, State1} ->
RemainingChildren = lists:keydelete(
Id, #child.id, State1#state.children
),
{stop, shutdown, State1#state{children = RemainingChildren}}
end;
handle_info(_Msg, State) ->
%TODO: log unexpected message
{noreply, State}.
Expand All @@ -321,9 +418,16 @@ terminate(_Reason, #state{children = Children} = State) ->

loop_terminate([#child{pid = undefined} | Tail], AccRemaining) ->
loop_terminate(Tail, AccRemaining);
loop_terminate([#child{pid = {restarting, _}} | Tail], AccRemaining) ->
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're using restarting in pid, we could also put terminating there as well (?). Also we should type this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not see anywhere that OTP was using {terminating, pid()}, but for our implementation we might want that for internal tracking when children are using timeout shutdown. It wouldn't make sense for brutal-kill, since those should have pid set to undefined (or the child removed entirely) immediately.

Yes, it looks like I forgot to type the pid in the child record, and a few of the added supervisor record fields too.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we might want that for internal tracking when children are using timeout shutdown.

... actually this wouldn't make sense either, we are already tracking this in the #state.restart.

loop_terminate(Tail, AccRemaining);
loop_terminate([#child{pid = Pid} = Child | Tail], AccRemaining) when is_pid(Pid) ->
do_terminate(Child),
loop_terminate(Tail, [Pid | AccRemaining]);
case is_process_alive(Pid) of
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about this optimization. The process could be gone between here and the exit message, and it's not worth it unless we need to ensure it's gone.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about this optimization. The process could be gone between here and the exit message, and it's not worth it unless we need to ensure it's gone.

At some point we do need to ensure its gone, or we can get stuck indefinitely in loop_wait_termination/1. I encountered this while testing after adding intensity/period support. It is possible to get stuck in loop_wait_termination, preventing the supervisor from shutting down after the maximum intensity has been reached.

Copy link
Collaborator Author

@UncleGrumpy UncleGrumpy Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little expensive, but because this is only during shutdown it shouldn't matter, but it would not be a terrible idea to iterate through the pids and be sure they are still alive before each tail call of loop_wait_termination.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I meant is if the child is gone, we must have received the DOWN message, so we don't need to query the process table to see if it's still there.

true ->
do_terminate(Child),
loop_terminate(Tail, [Pid | AccRemaining]);
false ->
loop_terminate(Tail, AccRemaining)
end;
loop_terminate([], AccRemaining) ->
AccRemaining.

Expand Down Expand Up @@ -364,6 +468,117 @@ try_start(#child{start = {M, F, Args}} = Record) ->
{error, {{'EXIT', Error}, Record}}
end.

add_restart(
#state{
intensity = Intensity, period = Period, restart_count = RestartCount, restarts = Restarts
} = State
) ->
Now = erlang:monotonic_time(millisecond),
Threshold = Now - Period * 1000,
case can_restart(Intensity, Threshold, Restarts, RestartCount) of
{true, RestartCount1, Restarts1} ->
{ok, State#state{
restarts = Restarts1 ++ [Now], restart_count = RestartCount1 + 1
}};
{false, RestartCount1, Restarts1} ->
% TODO: log supervisor shutdown due to maximum intensity exceeded
{shutdown, State#state{
restarts = Restarts1, restart_count = RestartCount1
}}
end.

can_restart(0, _, _, _) ->
{false, 0, []};
can_restart(_, _, _, 0) ->
{true, 0, []};
can_restart(Intensity, Threshold, Restarts, RestartCount) when
RestartCount >= ?STALE_RESTART_LIMIT
->
{NewCount, Restarts1} = trim_expired_restarts(Threshold, lists:sort(Restarts)),
can_restart(Intensity, Threshold, Restarts1, NewCount);
can_restart(Intensity, Threshold, [Restart | _] = Restarts, RestartCount) when
RestartCount >= Intensity andalso Restart < Threshold
->
{NewCount, Restarts1} = trim_expired_restarts(Threshold, lists:sort(Restarts)),
can_restart(Intensity, Threshold, Restarts1, NewCount);
can_restart(Intensity, _, Restarts, RestartCount) when RestartCount >= Intensity ->
{false, RestartCount, Restarts};
can_restart(Intensity, _, Restarts, RestartCount) when RestartCount < Intensity ->
{true, RestartCount, Restarts}.

trim_expired_restarts(Threshold, [Restart | Restarts]) when Restart < Threshold ->
trim_expired_restarts(Threshold, Restarts);
trim_expired_restarts(_Threshold, Restarts) ->
{length(Restarts), Restarts}.

restart_many_children(#child{pid = Pid} = Child, Children) ->
Siblings = lists:keydelete(Pid, #child.pid, Children),
{ok, Children1} = terminate_many_children(Siblings, [Child#child{pid = {restarting, Pid}}]),
do_restart_children(Children1, [], []).

terminate_many_children([], NewChildren) ->
{ok, lists:reverse(NewChildren)};
terminate_many_children([Child | Children], NewChildren) ->
case Child of
#child{restart = {terminating, _Restart, From}} = Child when is_pid(From) ->
terminate_many_children(Children, NewChildren);
#child{pid = undefined, restart = temporary} = Child ->
terminate_many_children(Children, NewChildren);
#child{pid = Pid, restart = temporary} = Child when is_pid(Pid) ->
do_terminate(Child),
terminate_many_children(Children, NewChildren);
#child{pid = undefined} = Child ->
terminate_many_children(Children, [Child | NewChildren]);
#child{pid = Pid} = Child when is_pid(Pid) ->
do_terminate(Child),
terminate_many_children(Children, [
Child#child{pid = {restarting, Pid}} | NewChildren
])
end.

do_restart_children([], NewChildren, []) ->
{ok, lists:reverse(NewChildren)};
do_restart_children([], NewChildren, [RetryChild | T] = RetryChildren) ->
if
length(T) =:= 0 ->
{ok, {lists:reverse(NewChildren), RetryChild#child.id}};
true ->
ok = differed_try_again(RetryChildren),
{ok, lists:reverse(NewChildren)}
end;
do_restart_children([#child{pid = Pid} = Child | Children], NewChildren, RetryChildren) ->
case Pid of
{restarting, _} ->
case try_start(Child) of
{ok, Pid1, {ok, Pid1}} ->
do_restart_children(
Children, [Child#child{pid = Pid1} | NewChildren], RetryChildren
);
{ok, Pid1, {ok, Pid1, _Result}} ->
do_restart_children(
Children, [Child#child{pid = Pid1} | NewChildren], RetryChildren
);
{ok, undefined, {ok, undefined}} ->
do_restart_children(
Children, [Child#child{pid = undefined} | NewChildren], RetryChildren
);
{error, _} ->
do_restart_children(Children, NewChildren, [Child | RetryChildren])
end;
_ ->
% retain previous ignore children without starting them
do_restart_children(Children, [Child | NewChildren], RetryChildren)
end.

%% Schedules "try again" restarts at 50ms intervals when multiple children have failed to restart
%% on the first attempt. This is an accumulated (reverse start order) list, so the children that
%% should start last get the longest delay before sending the try_again_restart request.
differed_try_again([]) ->
ok;
differed_try_again([Child | Children] = RetryChildren) ->
erlang:send_after(50 * length(RetryChildren), self(), {try_again_restart, Child#child.id}),
differed_try_again(Children).

child_to_info(#child{id = Id, pid = Pid, type = Type, modules = Modules}) ->
Child =
case Pid of
Expand Down
Loading
Loading