Skip to content

Commit 0952780

Browse files
authored
Merge pull request #2322 from max-au/max-au/cache-epp-deps
rebar3 Erlang compiler: performance improvements
2 parents a5bfc23 + 8fca720 commit 0952780

File tree

3 files changed

+205
-47
lines changed

3 files changed

+205
-47
lines changed

src/rebar_compiler_dag.erl

Lines changed: 76 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,19 @@
77

88
-include("rebar.hrl").
99

10-
-define(DAG_VSN, 3).
10+
-define(DAG_VSN, 4).
1111
-define(DAG_ROOT, "source").
1212
-define(DAG_EXT, ".dag").
1313

14-
-type dag_v() :: {digraph:vertex(), term()} | 'false'.
15-
-type dag_e() :: {digraph:vertex(), digraph:vertex()}.
16-
-type critical_meta() :: term(). % if this changes, the DAG is invalid
17-
-type dag_rec() :: {list(dag_v()), list(dag_e()), critical_meta()}.
18-
-type dag() :: digraph:graph().
14+
-type critical_meta() :: term().
15+
1916
-record(dag, {vsn = ?DAG_VSN :: pos_integer(),
20-
info = {[], [], []} :: dag_rec()}).
17+
meta :: critical_meta(),
18+
vtab :: notable | [tuple()],
19+
etab :: notable | [tuple()],
20+
ntab :: notable | [tuple()]}).
21+
22+
-type dag() :: digraph:graph().
2123

2224
%% @doc You should initialize one DAG per compiler module.
2325
%% `CritMeta' is any contextual information that, if it is found to change,
@@ -105,13 +107,46 @@ filter_prefix(G, [{App, Out} | AppTail] = AppPaths, [File | FTail]) ->
105107
filter_prefix(G, AppPaths, FTail)
106108
end.
107109

110+
finalise_populate_sources(_G, _InDirs, Waiting) when Waiting =:= #{} ->
111+
ok;
112+
finalise_populate_sources(G, InDirs, Waiting) ->
113+
%% wait for all deps to complete
114+
receive
115+
{deps, Pid, AbsIncls} ->
116+
{Status, Source} = maps:get(Pid, Waiting),
117+
%% the file hasn't been visited yet; set it to existing, but with
118+
%% a last modified value that's null so it gets updated to something new.
119+
[digraph:add_vertex(G, Src, 0) || Src <- AbsIncls,
120+
digraph:vertex(G, Src) =:= false],
121+
%% drop edges from deps that aren't included!
122+
[digraph:del_edge(G, Edge) || Status == old,
123+
Edge <- digraph:out_edges(G, Source),
124+
{_, _Src, Path, _Label} <- [digraph:edge(G, Edge)],
125+
not lists:member(Path, AbsIncls)],
126+
%% Add the rest
127+
[digraph:add_edge(G, Source, Incl) || Incl <- AbsIncls],
128+
%% mark the digraph dirty when there is any change in
129+
%% dependencies, for any application in the project
130+
mark_dirty(G),
131+
finalise_populate_sources(G, InDirs, Waiting);
132+
{'DOWN', _MRef, process, Pid, normal} ->
133+
finalise_populate_sources(G, InDirs, maps:remove(Pid, Waiting));
134+
{'DOWN', _MRef, process, Pid, Reason} ->
135+
{_Status, Source} = maps:get(Pid, Waiting),
136+
?ERROR("Failed to get dependencies for ~s~n~p", [Source, Reason]),
137+
?FAIL
138+
end.
139+
108140
%% @doc this function scans all the source files found and looks into
109141
%% all the `InDirs' for deps (other source files, or files that aren't source
110142
%% but still returned by the compiler module) that are related
111143
%% to them.
112-
populate_sources(_G, _Compiler, _InDirs, [], _DepOpts) ->
113-
ok;
114-
populate_sources(G, Compiler, InDirs, [Source|Erls], DepOpts) ->
144+
populate_sources(G, Compiler, InDirs, Sources, DepOpts) ->
145+
populate_sources(G, Compiler, InDirs, Sources, DepOpts, #{}).
146+
147+
populate_sources(G, _Compiler, InDirs, [], _DepOpts, Waiting) ->
148+
finalise_populate_sources(G, InDirs, Waiting);
149+
populate_sources(G, Compiler, InDirs, [Source|Erls], DepOpts, Waiting) ->
115150
case digraph:vertex(G, Source) of
116151
{_, LastUpdated} ->
117152
case filelib:last_modified(Source) of
@@ -120,21 +155,20 @@ populate_sources(G, Compiler, InDirs, [Source|Erls], DepOpts) ->
120155
%% from the graph.
121156
digraph:del_vertex(G, Source),
122157
mark_dirty(G),
123-
populate_sources(G, Compiler, InDirs, Erls, DepOpts);
158+
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting);
124159
LastModified when LastUpdated < LastModified ->
125160
digraph:add_vertex(G, Source, LastModified),
126-
prepopulate_deps(G, Compiler, InDirs, Source, DepOpts, old),
127-
mark_dirty(G);
161+
Worker = prepopulate_deps(Compiler, InDirs, Source, DepOpts, self()),
162+
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting#{Worker => {old, Source}});
128163
_ -> % unchanged
129-
ok
164+
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting)
130165
end;
131166
false ->
132167
LastModified = filelib:last_modified(Source),
133168
digraph:add_vertex(G, Source, LastModified),
134-
prepopulate_deps(G, Compiler, InDirs, Source, DepOpts, new),
135-
mark_dirty(G)
136-
end,
137-
populate_sources(G, Compiler, InDirs, Erls, DepOpts).
169+
Worker = prepopulate_deps(Compiler, InDirs, Source, DepOpts, self()),
170+
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting#{Worker => {new, Source}})
171+
end.
138172

139173
%% @doc Scan all files in the digraph that are seen as dependencies, but are
140174
%% neither source files nor artifacts (i.e. header files that don't produce
@@ -228,19 +262,23 @@ restore_dag(G, File, CritMeta) ->
228262
{ok, Data} ->
229263
%% The CritMeta value is checked and if it doesn't match, we fail
230264
%% the whole restore operation.
231-
#dag{vsn=?DAG_VSN, info={Vs, Es, CritMeta}} = binary_to_term(Data),
232-
[digraph:add_vertex(G, V, LastUpdated) || {V, LastUpdated} <- Vs],
233-
[digraph:add_edge(G, V1, V2, Label) || {_, V1, V2, Label} <- Es],
265+
#dag{vsn=?DAG_VSN, meta = CritMeta, vtab = VTab,
266+
etab = ETab, ntab = NTab} = binary_to_term(Data),
267+
{digraph, VT, ET, NT, false} = G,
268+
true = ets:insert_new(VT, VTab),
269+
true = ets:insert_new(ET, ETab),
270+
true = ets:delete_all_objects(NT),
271+
true = ets:insert(NT, NTab),
234272
ok;
235273
{error, _Err} ->
236274
ok
237275
end.
238276

239277
store_dag(G, File, CritMeta) ->
240278
ok = filelib:ensure_dir(File),
241-
Vs = lists:map(fun(V) -> digraph:vertex(G, V) end, digraph:vertices(G)),
242-
Es = lists:map(fun(E) -> digraph:edge(G, E) end, digraph:edges(G)),
243-
Data = term_to_binary(#dag{info={Vs, Es, CritMeta}}, [{compressed, 2}]),
279+
{digraph, VT, ET, NT, false} = G,
280+
Data = term_to_binary(#dag{meta = CritMeta, vtab = ets:tab2list(VT),
281+
etab = ets:tab2list(ET), ntab = ets:select(NT, [{'_',[],['$_']}])}, [{compressed, 2}]),
244282
file:write_file(File, Data).
245283

246284
%% Drop a file from the digraph if it doesn't exist, and if so,
@@ -285,26 +323,20 @@ maybe_rm_vertex(G, Source) ->
285323
%% mark its timestamp to 0, which means we have no info on it.
286324
%% Source files will be covered at a later point in their own scan, and
287325
%% non-source files are going to be covered by `populate_deps/3'.
288-
prepopulate_deps(G, Compiler, InDirs, Source, DepOpts, Status) ->
289-
SourceDir = filename:dirname(Source),
290-
AbsIncls = case erlang:function_exported(Compiler, dependencies, 4) of
291-
false ->
292-
Compiler:dependencies(Source, SourceDir, InDirs);
293-
true ->
294-
Compiler:dependencies(Source, SourceDir, InDirs, DepOpts)
295-
end,
296-
%% the file hasn't been visited yet; set it to existing, but with
297-
%% a last modified value that's null so it gets updated to something new.
298-
[digraph:add_vertex(G, Src, 0) || Src <- AbsIncls,
299-
digraph:vertex(G, Src) =:= false],
300-
%% drop edges from deps that aren't included!
301-
[digraph:del_edge(G, Edge) || Status == old,
302-
Edge <- digraph:out_edges(G, Source),
303-
{_, _Src, Path, _Label} <- [digraph:edge(G, Edge)],
304-
not lists:member(Path, AbsIncls)],
305-
%% Add the rest
306-
[digraph:add_edge(G, Source, Incl) || Incl <- AbsIncls],
307-
ok.
326+
prepopulate_deps(Compiler, InDirs, Source, DepOpts, Control) ->
327+
{Worker, _MRef} = spawn_monitor(
328+
fun () ->
329+
SourceDir = filename:dirname(Source),
330+
AbsIncls = case erlang:function_exported(Compiler, dependencies, 4) of
331+
false ->
332+
Compiler:dependencies(Source, SourceDir, InDirs);
333+
true ->
334+
Compiler:dependencies(Source, SourceDir, InDirs, DepOpts)
335+
end,
336+
Control ! {deps, self(), AbsIncls}
337+
end
338+
),
339+
Worker.
308340

309341
%% check that a dep file is up to date
310342
refresh_dep(_G, {artifact, _}) ->

src/rebar_compiler_epp.erl

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,18 @@
44
%%% @end
55
-module(rebar_compiler_epp).
66
-export([deps/2, resolve_module/2]).
7+
%% cache (a la code path storage, but for dependencies not in code path)
8+
-export([ensure_started/0, flush/0, resolve_source/2]).
9+
-export([init/1, handle_call/3, handle_cast/2]).
10+
%% remove when OTP 19 support is no longer needed
11+
-export([handle_info/2, terminate/2, code_change/3]).
12+
13+
-behaviour(gen_server).
14+
715
-include_lib("kernel/include/file.hrl").
816

17+
-include("rebar.hrl").
18+
919
%%%%%%%%%%%%%%%%%%%%%%%%%%%
1020
%%% Basic File Handling %%%
1121
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -37,6 +47,120 @@ resolve_module(Mod, Paths) ->
3747
Path -> {ok, Path}
3848
end.
3949

50+
%%%%%%%%%%%%%%%%%%%%%%%%%%%
51+
%%% Cache for deps %%%
52+
%%%%%%%%%%%%%%%%%%%%%%%%%%%
53+
-spec ensure_started() -> ok.
54+
ensure_started() ->
55+
case whereis(?MODULE) of
56+
undefined ->
57+
case gen_server:start({local, ?MODULE}, ?MODULE, [], []) of
58+
{ok, _Pid} ->
59+
ok;
60+
{error, {already_started, _Pid}} ->
61+
ok
62+
end;
63+
Pid when is_pid(Pid) ->
64+
ok
65+
end.
66+
67+
flush() ->
68+
gen_server:cast(?MODULE, flush).
69+
70+
%% @doc Resolves "Name" erl module to a path, given list of paths to search.
71+
%% Caches result for subsequent requests.
72+
-spec resolve_source(atom() | file:filename_all(), [file:filename_all()]) -> {true, file:filename_all()} | false.
73+
resolve_source(Name, Dirs) when is_atom(Name) ->
74+
gen_server:call(?MODULE, {resolve, atom_to_list(Name) ++ ".erl", Dirs});
75+
resolve_source(Name, Dirs) when is_list(Name) ->
76+
gen_server:call(?MODULE, {resolve, Name, Dirs}).
77+
78+
-record(state, {
79+
%% filesystem cache, denormalised
80+
fs = #{} :: #{file:filename_all() => [file:filename_all()]},
81+
%% map of module name => abs path
82+
resolved = #{} :: #{file:filename_all() => file:filename_all()}
83+
}).
84+
85+
init([]) ->
86+
{ok, #state{}}.
87+
88+
handle_call({resolve, Name, Dirs}, _From, #state{fs = Fs, resolved = Res} = State) ->
89+
case maps:find(Name, Res) of
90+
{ok, Found} ->
91+
{reply, Found, State};
92+
error ->
93+
{Resolved, NewFs} = resolve(Name, Fs, Dirs),
94+
{reply, Resolved, State#state{resolved = Res#{Name => Resolved}, fs = NewFs}}
95+
end.
96+
97+
handle_cast(flush, _State) ->
98+
{noreply, #state{}}.
99+
100+
resolve(_Name, Fs, []) ->
101+
{false, Fs};
102+
resolve(Name, Fs, [Dir | Tail]) ->
103+
{NewFs, Files} = list_directory(Dir, Fs),
104+
case lists:member(Name, Files) of
105+
true ->
106+
{{true, filename:join(Dir, Name)}, NewFs};
107+
false ->
108+
resolve(Name, NewFs, Tail)
109+
end.
110+
111+
%% list_directory/2 caches files in the directory and all subdirectories,
112+
%% to support the behaviour of looking for source files in
113+
%% subdirectories of src/* folder.
114+
%% This may introduce weird dependencies for cases when CT
115+
%% test cases contain test data with files named the same
116+
%% as requested behaviour/parse_transforms, but let's hope
117+
%% it won't happen for many projects. If it does, in fact,
118+
%% it won't cause any damage, just extra unexpected recompiles.
119+
list_directory(Dir, Cache) ->
120+
case maps:find(Dir, Cache) of
121+
{ok, Files} ->
122+
{Cache, Files};
123+
error ->
124+
case file:list_dir(Dir) of
125+
{ok, DirFiles} ->
126+
%% create a full list of *.erl files under Dir.
127+
{NewFs, Files} = lists:foldl(
128+
fun (File, {DirCache, Files} = Acc) ->
129+
%% recurse into subdirs
130+
FullName = filename:join(Dir, File),
131+
case filelib:is_dir(FullName) of
132+
true ->
133+
{UpdFs, MoreFiles} = list_directory(FullName, DirCache),
134+
{UpdFs, MoreFiles ++ Files};
135+
false ->
136+
%% ignore all but *.erl files
137+
case filename:extension(File) =:= ".erl" of
138+
true ->
139+
{DirCache, [File | Files]};
140+
false ->
141+
Acc
142+
end
143+
end
144+
end,
145+
{Cache, []}, DirFiles),
146+
{NewFs#{Dir => Files}, Files};
147+
{error, Reason} ->
148+
?DEBUG("Failed to list ~s, ~p", [Dir, Reason]),
149+
{Cache, []}
150+
end
151+
end.
152+
153+
%%%%%%%%%%%%%%%
154+
%%% OTP 19 %%%
155+
handle_info(_Request, State) ->
156+
{noreply, State}.
157+
158+
terminate(_Reason, _State) ->
159+
ok.
160+
161+
code_change(_OldVsn, State, _Extra) ->
162+
{ok, State}.
163+
40164
%%%%%%%%%%%%%%%
41165
%%% PRIVATE %%%
42166
%%%%%%%%%%%%%%%

src/rebar_compiler_erl.erl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ dependencies(Source, SourceDir, Dirs) ->
100100
end.
101101

102102
dependencies(Source, _SourceDir, Dirs, DepOpts) ->
103+
rebar_compiler_epp:ensure_started(),
103104
OptPTrans = proplists:get_value(parse_transforms, DepOpts, []),
104105
try rebar_compiler_epp:deps(Source, DepOpts) of
105106
#{include := AbsIncls,
@@ -110,9 +111,9 @@ dependencies(Source, _SourceDir, Dirs, DepOpts) ->
110111
%% TODO: check for core transforms?
111112
{_MissIncl, _MissInclLib} =/= {[],[]} andalso
112113
?DEBUG("Missing: ~p", [{_MissIncl, _MissInclLib}]),
113-
expand_file_names([module_to_erl(Mod) || Mod <- OptPTrans ++ PTrans], Dirs) ++
114-
expand_file_names([module_to_erl(Mod) || Mod <- Behaviours], Dirs) ++
115-
AbsIncls
114+
lists:filtermap(
115+
fun (Mod) -> rebar_compiler_epp:resolve_source(Mod, Dirs) end,
116+
OptPTrans ++ PTrans ++ Behaviours) ++ AbsIncls
116117
catch
117118
error:{badmatch, {error, Reason}} ->
118119
case file:format_error(Reason) of
@@ -141,6 +142,7 @@ compile(Source, [{_, OutDir}], Config, ErlOpts) ->
141142
end.
142143

143144
compile_and_track(Source, [{Ext, OutDir}], Config, ErlOpts) ->
145+
rebar_compiler_epp:flush(),
144146
BuildOpts = [{outdir, OutDir} | ErlOpts],
145147
Target = target_base(OutDir, Source) ++ Ext,
146148
AllOpts = case erlang:function_exported(compile, env_compiler_options, 0) of

0 commit comments

Comments
 (0)