Split up the compiler DAG
This is another tricky commit towards replacing the current module
analysis with EPP. The compiler DAG is now being shared across multiple
applications being compiled, rather than a per-application basis, which
promises to allow better ordering, parallelism, and more thorough
invalidation of runtime dependencies when they are modified.

This however required changes:
- The compiler DAG is no longer private to `rebar_compiler`, and has
  been extracted to the `rebar_compiler_dag` module
- The compiler DAG is now started by the `rebar_prv_compile` module,
  which oversees the calls to `rebar_compiler` for each OTP application
- The compiler DAG has been refactored to use a "dirty flag" to know if
  it was modified, rather than just tracking modifications in a
  functional manner, since the scope change (going multi-app) makes it
  impossible to cleanly use the functional approach without much larger
- The DAG used to be cached within each OTP application. This is no
  longer possible since it is shared. Instead the DAG is stored in the
  state's deps_dir, which allows to cleanly split caches between regular
  apps for the user's project and plugins
- The DAG supported a "label" mode that was used to store distinct DAGs
  for extra_src_dir runs and regular modules; this label is now used
  (and extended to `rebar_prv_compile` internals) to distinguish between
  "compile runs", such as "project_apps", or just "apps" (deps). The
  label is optional (i.e. not used by plugins which have no such need)
- The extra_src_dirs for each app is now compiled using the main app's
  DAG, but the run takes place later in the compilation process. This
  may need changing to detect and prevent dependencies from src_dirs
  into extra_src_dirs, but this should not technically be a problem for
  runtime anyway.
- Reworked the support for extra_src_dirs that are at the root of an
  umbrella project (and therefore do not belong to any single app) to
  use the new structure, also as part of the project_apps DAG.

All tests keep passing, and this puts us in a better place to use EPP
with cross-app support in the near-future.
ferd committed Jan 31, 2020
1 parent 8ff256d commit 3b3201a
ok | {ok, [string()]} | {ok, [string()], [string()]}.
-callback clean([file:filename()], rebar_app_info:t()) -> _.

-define(DAG_VSN, 2).
-define(DAG_ROOT, "source").
-define(DAG_EXT, ".dag").
-type dag_v() :: {digraph:vertex(), term()} | 'false'.
-type dag_e() :: {digraph:vertex(), digraph:vertex()}.
-type dag() :: {list(dag_v()), list(dag_e()), list(string())}.
-record(dag, {vsn = ?DAG_VSN :: pos_integer(),
info = {[], [], []} :: dag()}).

-define(RE_PREFIX, "^(?!\\._)").

compile_all(Compilers, AppInfo) ->
-spec compile_all([{module(), digraph:graph()}, ...], rebar_app_info:t()) -> ok
; ([module(), ...], rebar_app_info:t()) -> ok.
compile_all(DAGs, AppInfo) when is_tuple(hd(DAGs)) -> % > 3.13.0
lists:foreach(fun({Compiler, G}) ->
run(G, Compiler, AppInfo),
%% TODO: disable default recursivity in extra_src_dirs compiling to
%% prevent compiling sample modules in _SUITE_data/ directories
%% in CT.
ExtraApps = annotate_extras(AppInfo),
[run(G, Compiler, ExtraAppInfo) || ExtraAppInfo <- ExtraApps],
compile_all(Compilers, AppInfo) -> % =< 3.13.0 interface; plugins use this!
%% Support the old-style API by re-declaring a local DAG for the
%% compile steps needed.
lists:foreach(fun(Compiler) ->
OutDir = rebar_app_info:out_dir(AppInfo),
G = rebar_compiler_dag:init(OutDir, Compiler, undefined, []),
compile_all([{Compiler, G}], AppInfo),
rebar_compiler_dag:maybe_store(G, OutDir, Compiler, undefined, []),
end, Compilers).

prepare_compiler_env(AppInfo) ->
EbinDir = rebar_utils:to_list(rebar_app_info:ebin_dir(AppInfo)),
%% Make sure that outdir is on the path
ok = rebar_file_utils:ensure_dir(EbinDir),
Expand All @@ -51,15 +68,9 @@ compile_all(Compilers, AppInfo) ->
%% called here for clarity as it's required by both opts_changed/2
%% and erl_compiler_opts_set/0 in needed_files
_ = code:ensure_loaded(compile),

lists:foreach(fun(CompilerMod) ->
run(CompilerMod, AppInfo, undefined),
run_on_extra_src_dirs(CompilerMod, AppInfo,
fun(Mod, App) -> run(Mod, App, "extra") end)
end, Compilers),

run(CompilerMod, AppInfo, Label) ->
run(G, CompilerMod, AppInfo) ->
#{src_dirs := SrcDirs,
include_dirs := InclDirs,
src_ext := SrcExt,
Expand All @@ -72,12 +83,14 @@ run(CompilerMod, AppInfo, Label) ->
AbsInclDirs = [filename:join(BaseDir, InclDir) || InclDir <- InclDirs],
FoundFiles = find_source_files(BaseDir, SrcExt, SrcDirs, BaseOpts),

OutDir = rebar_app_info:out_dir(AppInfo),
AbsSrcDirs = [filename:join(BaseDir, SrcDir) || SrcDir <- SrcDirs],
G = init_dag(CompilerMod, AbsInclDirs, AbsSrcDirs, FoundFiles, OutDir, EbinDir, Label),
{{FirstFiles, FirstFileOpts}, {RestFiles, Opts}} = CompilerMod:needed_files(G, FoundFiles,
Mappings, AppInfo),
true = digraph:delete(G),

InDirs = lists:usort(AbsInclDirs ++ AbsSrcDirs),

rebar_compiler_dag:prune(G, AbsSrcDirs, EbinDir, FoundFiles),
rebar_compiler_dag:update(G, CompilerMod, InDirs, FoundFiles),
{{FirstFiles, FirstFileOpts},
{RestFiles, Opts}} = CompilerMod:needed_files(G, FoundFiles, Mappings, AppInfo),

compile_each(FirstFiles, FirstFileOpts, BaseOpts, Mappings, CompilerMod),
case RestFiles of
Expand Down Expand Up @@ -167,20 +180,19 @@ compile_queue(Targets, Pids, Opts, Config, Outs, CompilerMod) ->
clean(Compilers, AppInfo) ->
lists:foreach(fun(CompilerMod) ->
clean_(CompilerMod, AppInfo, undefined),
run_on_extra_src_dirs(CompilerMod, AppInfo,
fun(Mod, App) -> clean_(Mod, App, "extra") end)
Extras = annotate_extras(AppInfo),
[clean_(CompilerMod, ExtraApp, "extra") || ExtraApp <- Extras]
end, Compilers).

clean_(CompilerMod, AppInfo, Label) ->
clean_(CompilerMod, AppInfo, _Label) ->
#{src_dirs := SrcDirs,
src_ext := SrcExt} = CompilerMod:context(AppInfo),
BaseDir = rebar_app_info:dir(AppInfo),
Opts = rebar_app_info:opts(AppInfo),
EbinDir = rebar_app_info:ebin_dir(AppInfo),

FoundFiles = find_source_files(BaseDir, SrcExt, SrcDirs, Opts),
CompilerMod:clean(FoundFiles, AppInfo),
rebar_file_utils:rm_rf(dag_file(CompilerMod, EbinDir, Label)).

-spec needs_compile(filename:all(), extension(), [{extension(), file:dirname()}]) -> boolean().
needs_compile(Source, OutExt, Mappings) ->
Expand All @@ -190,30 +202,23 @@ needs_compile(Source, OutExt, Mappings) ->
Target = filename:join(OutDir, BaseName++OutExt),
filelib:last_modified(Source) > filelib:last_modified(Target).

run_on_extra_src_dirs(CompilerMod, AppInfo, Fun) ->
annotate_extras(AppInfo) ->
ExtraDirs = rebar_dir:extra_src_dirs(rebar_app_info:opts(AppInfo), []),
run_on_extra_src_dirs(ExtraDirs, CompilerMod, AppInfo, Fun).

run_on_extra_src_dirs([], _CompilerMod, _AppInfo, _Fun) ->
run_on_extra_src_dirs([Dir | Rest], CompilerMod, AppInfo, Fun) ->
case filelib:is_dir(filename:join(rebar_app_info:dir(AppInfo), Dir)) of
true ->
OldSrcDirs = rebar_app_info:get(AppInfo, src_dirs, ["src"]),
AppDir = rebar_app_info:dir(AppInfo),
EbinDir = filename:join(rebar_app_info:out_dir(AppInfo), Dir),
AppInfo1 = rebar_app_info:ebin_dir(AppInfo, EbinDir),
AppInfo2 = rebar_app_info:set(AppInfo1, src_dirs, [Dir]),
AppInfo3 = rebar_app_info:set(AppInfo2, extra_src_dirs, OldSrcDirs),
AppInfo4 = add_to_includes( % give access to .hrl in app's src/
[filename:join([AppDir, D]) || D <- OldSrcDirs]
Fun(CompilerMod, AppInfo4);
_ ->
OldSrcDirs = rebar_app_info:get(AppInfo, src_dirs, ["src"]),
AppDir = rebar_app_info:dir(AppInfo),
lists:map(fun(Dir) ->
EbinDir = filename:join(rebar_app_info:out_dir(AppInfo), Dir),
AppInfo1 = rebar_app_info:ebin_dir(AppInfo, EbinDir),
AppInfo2 = rebar_app_info:set(AppInfo1, src_dirs, [Dir]),
AppInfo3 = rebar_app_info:set(AppInfo2, extra_src_dirs, OldSrcDirs),
add_to_includes( % give access to .hrl in app's src/
[filename:join([AppDir, D]) || D <- OldSrcDirs]
run_on_extra_src_dirs(Rest, CompilerMod, AppInfo, Fun).
[ExtraDir || ExtraDir <- ExtraDirs,
filelib:is_dir(filename:join(AppDir, ExtraDir))]

%% These functions are here for the ultimate goal of getting rid of
%% rebar_base_compiler. This can't be done because of existing plugins.
Expand All @@ -233,7 +238,7 @@ format_error_source(Path, Opts) ->
report(Messages) ->

%% private functions
%%% private functions

find_source_files(BaseDir, SrcExt, SrcDirs, Opts) ->
SourceExtRe = "^(?!\\._).*\\" ++ SrcExt ++ [$$],
Expand All @@ -242,160 +247,6 @@ find_source_files(BaseDir, SrcExt, SrcDirs, Opts) ->
rebar_utils:find_files_in_dirs([filename:join(BaseDir, SrcDir)], SourceExtRe, Recursive)
end, SrcDirs).

%% @private generate the name for the DAG based on the compiler module and
%% a custom label, both of which are used to prevent various compiler runs
%% from clobbering each other. The label `undefined' is kept for a default
%% run of the compiler, to keep in line with previous versions of the file.
dag_file(CompilerMod, Dir, undefined) ->
filename:join([rebar_dir:local_cache_dir(Dir), CompilerMod,
dag_file(CompilerMod, Dir, Label) ->
filename:join([rebar_dir:local_cache_dir(Dir), CompilerMod,
?DAG_ROOT ++ "_" ++ Label ++ ?DAG_EXT]).

%% private graph functions

%% Get dependency graph of given Erls files and their dependencies (header files,
%% parse transforms, behaviours etc.) located in their directories or given
%% InclDirs. Note that last modification times stored in vertices already respect
%% dependencies induced by given graph G.
init_dag(Compiler, InclDirs, SrcDirs, Erls, Dir, EbinDir, Label) ->
G = digraph:new([acyclic]),
try restore_dag(Compiler, G, InclDirs, Dir, Label)
_:_ ->
?WARN("Failed to restore ~ts file. Discarding it.~n", [dag_file(Compiler, Dir, Label)]),
file:delete(dag_file(Compiler, Dir, Label))
Dirs = lists:usort(InclDirs ++ SrcDirs),
%% A source file may have been renamed or deleted. Remove it from the graph
%% and remove any beam file for that source if it exists.
Modified = maybe_rm_beams_and_edges(G, EbinDir, Erls),
Modified1 = lists:foldl(update_dag_fun(G, Compiler, Dirs), Modified, Erls),
if Modified1 -> store_dag(Compiler, G, InclDirs, Dir, Label);
not Modified1 -> ok

maybe_rm_beams_and_edges(G, Dir, Files) ->
Vertices = digraph:vertices(G),
case lists:filter(fun(File) ->
case filename:extension(File) =:= ".erl" of
true ->
maybe_rm_beam_and_edge(G, Dir, File);
false ->
end, lists:sort(Vertices) -- lists:sort(Files)) of
[] ->
_ ->

maybe_rm_beam_and_edge(G, OutDir, Source) ->
%% This is NOT a double check it is the only check that the source file is actually gone
case filelib:is_regular(Source) of
true ->
%% Actually exists, don't delete
false ->
Target = target_base(OutDir, Source) ++ ".beam",
?DEBUG("Source ~ts is gone, deleting previous beam file if it exists ~ts", [Source, Target]),
digraph:del_vertex(G, Source),

target_base(OutDir, Source) ->
filename:join(OutDir, filename:basename(Source, ".erl")).

restore_dag(Compiler, G, InclDirs, Dir, Label) ->
case file:read_file(dag_file(Compiler, Dir, Label)) of
{ok, Data} ->
% Since externally passed InclDirs can influence dependency graph (see
% modify_dag), we have to check here that they didn't change.
#dag{vsn=?DAG_VSN, info={Vs, Es, InclDirs}} =
fun({V, LastUpdated}) ->
digraph:add_vertex(G, V, LastUpdated)
end, Vs),
fun({_, V1, V2, _}) ->
digraph:add_edge(G, V1, V2)
end, Es);
{error, _} ->

store_dag(Compiler, G, InclDirs, Dir, Label) ->
Vs = lists:map(fun(V) -> digraph:vertex(G, V) end, digraph:vertices(G)),
Es = lists:map(fun(E) -> digraph:edge(G, E) end, digraph:edges(G)),
File = dag_file(Compiler, Dir, Label),
ok = filelib:ensure_dir(File),
Data = term_to_binary(#dag{info={Vs, Es, InclDirs}}, [{compressed, 2}]),
file:write_file(File, Data).

update_dag(G, Compiler, Dirs, Source) ->
case digraph:vertex(G, Source) of
{_, LastUpdated} ->
case filelib:last_modified(Source) of
0 ->
%% The file doesn't exist anymore,
%% erase it from the graph.
%% All the edges will be erased automatically.
digraph:del_vertex(G, Source),
LastModified when LastUpdated < LastModified ->
modify_dag(G, Compiler, Source, LastModified, filename:dirname(Source), Dirs);
_ ->
Modified = lists:foldl(
update_dag_fun(G, Compiler, Dirs),
false, digraph:out_neighbours(G, Source)),
MaxModified = update_max_modified_deps(G, Source),
case Modified orelse MaxModified > LastUpdated of
true -> modified;
false -> unmodified
false ->
modify_dag(G, Compiler, Source, filelib:last_modified(Source), filename:dirname(Source), Dirs)

modify_dag(G, Compiler, Source, LastModified, SourceDir, Dirs) ->
AbsIncls = Compiler:dependencies(Source, SourceDir, Dirs),
digraph:add_vertex(G, Source, LastModified),
digraph:del_edges(G, digraph:out_edges(G, Source)),
fun(Incl) ->
update_dag(G, Compiler, Dirs, Incl),
digraph:add_edge(G, Source, Incl)
end, AbsIncls),

update_dag_fun(G, Compiler, Dirs) ->
fun(Erl, Modified) ->
case update_dag(G, Compiler, Dirs, Erl) of
modified -> true;
unmodified -> Modified

update_max_modified_deps(G, Source) ->
MaxModified =
lists:foldl(fun(File, Acc) ->
case digraph:vertex(G, File) of
{_, MaxModified} when MaxModified > Acc ->
_ ->
end, 0, [Source | digraph:out_neighbours(G, Source)]),
digraph:add_vertex(G, Source, MaxModified),

add_to_includes(AppInfo, Dirs) ->
Opts = rebar_app_info:opts(AppInfo),
List = rebar_opts:get(Opts, erl_opts, []),
Expand Down

