Skip to content

Commit 63555da

Browse files
lukemaurermshinwell
authored andcommitted
Attempt to shrink the heap before calling the assembler (#429)
* Attempt to shrink the heap before calling the assembler We have a fair amount of global state that's never collected, which is causing trouble in large parallel builds, particularly when large assembly files are generated, causing `as` to need a lot of memory itself. This patch simply clears out some data once it's no longer needed and, when memory use is high, calls `Gc.compact` to try and release some memory back to the OS. This has been observed to reduce the heap size by some 640MB (for an unusually large generated .ml file). This PR still leaves some relatively low-hanging fruit: after to_cmm, we can marshal the .cmx eagerly and clear out the exported info. However, it's not obvious how to handle this cleanly, and anyway one can use `-Oclassic` to avoid producing a large .cmx to begin with.
1 parent c1babdf commit 63555da

29 files changed

+118
-35
lines changed

backend/amd64/emit.mlp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,13 +1204,17 @@ let data l =
12041204

12051205
(* Beginning / end of an assembly file *)
12061206

1207-
let begin_assembly() =
1207+
let reset_all () =
12081208
X86_proc.reset_asm_code ();
1209+
Emitaux.reset ();
12091210
reset_debug_info(); (* PR#5603 *)
12101211
reset_imp_table();
12111212
reset_probes ();
12121213
float_constants := [];
1213-
all_functions := [];
1214+
all_functions := []
1215+
1216+
let begin_assembly() =
1217+
reset_all ();
12141218
if system = S_win64 then begin
12151219
D.extrn "caml_call_gc" NEAR;
12161220
D.extrn "caml_c_call" NEAR;
@@ -1592,4 +1596,5 @@ let end_assembly() =
15921596
else
15931597
None
15941598
in
1595-
X86_proc.generate_code asm
1599+
X86_proc.generate_code asm;
1600+
reset_all ()

backend/asmgen.ml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ let compile_unit ~output_prefix ~asm_filename ~keep_asm ~obj_filename gen =
285285
~exceptionally:(fun () ->
286286
if create_asm && not keep_asm then remove_file asm_filename);
287287
if should_emit () then begin
288+
Emitaux.reduce_heap_size ~reset:(fun () ->
289+
reset ();
290+
Typemod.reset ();
291+
Emitaux.reset ();
292+
Reg.reset ());
288293
let assemble_result =
289294
Profile.record "assemble"
290295
(Proc.assemble_file asm_filename) obj_filename
@@ -344,9 +349,9 @@ let compile_implementation ?toplevel ~backend ~filename ~prefixname ~middle_end
344349
in
345350
end_gen_implementation ?toplevel ~ppf_dump clambda_with_constants)
346351

347-
let compile_implementation_flambda2 ?toplevel ~filename ~prefixname
348-
~size:module_block_size_in_words ~module_ident ~module_initializer
349-
~flambda2 ~ppf_dump ~required_globals () =
352+
let compile_implementation_flambda2 ?toplevel ?(keep_symbol_tables=true)
353+
~filename ~prefixname ~size:module_block_size_in_words ~module_ident
354+
~module_initializer ~flambda2 ~ppf_dump ~required_globals () =
350355
compile_unit ~output_prefix:prefixname
351356
~asm_filename:(asm_filename prefixname) ~keep_asm:!keep_asm_file
352357
~obj_filename:(prefixname ^ ext_obj)
@@ -355,6 +360,7 @@ let compile_implementation_flambda2 ?toplevel ~filename ~prefixname
355360
let cmm_phrases =
356361
flambda2 ~ppf_dump ~prefixname ~filename ~module_ident
357362
~module_block_size_in_words ~module_initializer
363+
~keep_symbol_tables
358364
in
359365
end_gen_implementation0 ?toplevel ~ppf_dump (fun () -> cmm_phrases))
360366

backend/asmgen.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ val compile_implementation
4040
Cmmgen pass. Instead it emits Cmm directly. *)
4141
val compile_implementation_flambda2
4242
: ?toplevel:(string -> bool)
43+
-> ?keep_symbol_tables:bool
4344
-> filename:string
4445
-> prefixname:string
4546
-> size:int
@@ -52,6 +53,7 @@ val compile_implementation_flambda2
5253
module_ident:Ident.t ->
5354
module_block_size_in_words:int ->
5455
module_initializer:Lambda.lambda ->
56+
keep_symbol_tables:bool ->
5557
Cmm.phrase list)
5658
-> ppf_dump:Format.formatter
5759
-> required_globals:Ident.Set.t

backend/asmlink.ml

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -396,20 +396,7 @@ let link ~ppf_dump objfiles output_name =
396396
~asm_filename:startup ~keep_asm:!Clflags.keep_startup_file
397397
~obj_filename:startup_obj
398398
(fun () -> make_startup_file ~ppf_dump units_tolink);
399-
(* Clear all state and compact before calling the linker, because the linker
400-
can use a lot of memory, and this reduces the peak memory usage by freeing
401-
most of the memory from this process before the linker starts using memory.
402-
403-
On a link where this frees up around 1.1GB of memory this takes around 0.6s. We
404-
only take this time on large links where the number of parallel linking jobs is
405-
likely to be constrained by total system memory. *)
406-
let _minor, _promoted, major_words = Gc.counters () in
407-
(* Uses [major_words] because it doesn't require a heap traversal to compute and
408-
for this workload a majority of major words are live at this point. *)
409-
if major_words > 500_000_000.0 /. 8.0 then
410-
Profile.record_call "asmlink_compact" (fun () ->
411-
reset ();
412-
Gc.compact ());
399+
Emitaux.reduce_heap_size ~reset:(fun () -> reset ());
413400
Misc.try_finally
414401
(fun () ->
415402
call_linker (List.map object_file_name objfiles)

backend/asmpackager.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ let make_package_object ~ppf_dump members targetobj targetname coercion
114114
~flambda2
115115
~ppf_dump
116116
~required_globals:required_globals
117+
~keep_symbol_tables:false
117118
()
118119
end else begin
119120
let program, middle_end =

backend/asmpackager.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ val package_files
2929
module_ident:Ident.t ->
3030
module_block_size_in_words:int ->
3131
module_initializer:Lambda.lambda ->
32+
keep_symbol_tables:bool ->
3233
Cmm.phrase list)
3334
-> unit
3435

backend/emitaux.ml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,3 +375,13 @@ let reset () =
375375

376376
let binary_backend_available = ref false
377377
let create_asm_file = ref true
378+
379+
let reduce_heap_size ~reset =
380+
let _minor, _promoted, major_words = Gc.counters () in
381+
(* Uses [major_words] because it doesn't require a heap traversal to compute and
382+
for this workload a majority of major words are live at this point. *)
383+
if major_words > 500_000_000.0 /. 8.0 then begin
384+
Profile.record_call "compact" (fun () ->
385+
reset ();
386+
Gc.compact ())
387+
end

backend/emitaux.mli

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,18 @@ val binary_backend_available: bool ref
8181

8282
val create_asm_file: bool ref
8383
(** Are we actually generating the textual assembly file? *)
84+
85+
(** Clear global state and compact the heap, so that an external program
86+
(such as the assembler or linker) may have more memory available to it.
87+
88+
When this frees up around 1.1GB of memory, it takes around 0.6s. We only
89+
take this time when the job is large enough that we're worried that we'll
90+
either run out of memory or constrain the number of parallel jobs. We
91+
heuristically measure how big the job is by how much heap we're using
92+
ourselves.
93+
94+
The [reset] parameter will be called before [Gc.compact] if we go ahead
95+
with the compaction. It should clear as much as possible from the global
96+
state, since the fewer live words there are after GC, the smaller the new
97+
heap can be. *)
98+
val reduce_heap_size : reset:(unit -> unit) -> unit

driver/optcompile.ml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ let flambda_and_flambda2 i typed ~compile_implementation =
5151
~required_globals;
5252
Compilenv.save_unit_info (cmx i)))
5353

54-
let flambda2_ i ~flambda2 typed =
54+
let flambda2_ i ~flambda2 ~keep_symbol_tables typed =
5555
flambda_and_flambda2 i typed
5656
~compile_implementation:(fun ~module_ident ~main_module_block_size ~code
5757
~required_globals ->
@@ -64,6 +64,7 @@ let flambda2_ i ~flambda2 typed =
6464
~flambda2
6565
~ppf_dump:i.ppf_dump
6666
~required_globals
67+
~keep_symbol_tables
6768
())
6869

6970
let flambda i backend typed =
@@ -112,13 +113,14 @@ let emit i =
112113
Compilenv.reset ?packname:!Clflags.for_package i.module_name;
113114
Asmgen.compile_implementation_linear i.output_prefix ~progname:i.source_file
114115

115-
let implementation ~backend ~flambda2 ~start_from ~source_file ~output_prefix =
116+
let implementation ~backend ~flambda2 ~start_from ~source_file ~output_prefix
117+
~keep_symbol_tables =
116118
let backend info typed =
117119
Compilenv.reset ?packname:!Clflags.for_package info.module_name;
118120
if Config.flambda
119121
then flambda info backend typed
120122
else if Config.flambda2
121-
then flambda2_ info ~flambda2 typed
123+
then flambda2_ info ~flambda2 ~keep_symbol_tables typed
122124
else clambda info backend typed
123125
in
124126
with_info ~source_file ~output_prefix ~dump_ext:"cmx" @@ fun info ->

driver/optcompile.mli

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@ val implementation
2626
module_ident:Ident.t ->
2727
module_block_size_in_words:int ->
2828
module_initializer:Lambda.lambda ->
29+
keep_symbol_tables:bool ->
2930
Cmm.phrase list)
3031
-> start_from:Clflags.Compiler_pass.t
31-
-> source_file:string -> output_prefix:string -> unit
32+
-> source_file:string -> output_prefix:string -> keep_symbol_tables:bool
33+
-> unit
3234

3335
(** {2 Internal functions} **)
3436

driver/optmaindriver.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@ val main
2828
module_ident:Ident.t ->
2929
module_block_size_in_words:int ->
3030
module_initializer:Lambda.lambda ->
31+
keep_symbol_tables:bool ->
3132
Cmm.phrase list)
3233
-> int

middle_end/compilenv.ml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ let global_infos_table =
3636
let export_infos_table =
3737
(Hashtbl.create 10 : (string, Export_info.t) Hashtbl.t)
3838

39+
let reset_info_tables () =
40+
Hashtbl.reset global_infos_table;
41+
Hashtbl.reset export_infos_table
42+
3943
let imported_sets_of_closures_table =
4044
(Set_of_closures_id.Tbl.create 10
4145
: Simple_value_approx.function_declarations option

middle_end/compilenv.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ val reset: ?packname:string -> string -> unit
3333
(* Reset the environment and record the name of the unit being
3434
compiled (arg). Optional argument is [-for-pack] prefix. *)
3535

36+
val reset_info_tables: unit -> unit
37+
3638
val unit_id_from_name: string -> Ident.t
3739
(* flambda-only *)
3840

middle_end/flambda2/flambda2.ml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ let output_flexpect ~ml_filename ~raw_flambda:old_unit new_unit =
144144
Format.pp_print_flush ppf ())
145145

146146
let lambda_to_cmm ~ppf_dump:ppf ~prefixname ~filename ~module_ident
147-
~module_block_size_in_words ~module_initializer =
147+
~module_block_size_in_words ~module_initializer ~keep_symbol_tables =
148148
Misc.Color.setup (Flambda_features.colour ());
149149
(* When the float array optimisation is enabled, the length of an array needs
150150
to be computed differently according to the array kind, in the case where
@@ -203,7 +203,17 @@ let lambda_to_cmm ~ppf_dump:ppf ~prefixname ~filename ~module_ident
203203
Flambda2_identifiers.Code_id.print (Code.code_id code)
204204
Cost_metrics.print size)
205205
end;
206-
Flambda2_to_cmm.To_cmm.unit ~make_symbol:Compilenv.make_symbol flambda cmx
207-
~all_code
206+
let cmm =
207+
Flambda2_to_cmm.To_cmm.unit ~make_symbol:Compilenv.make_symbol flambda cmx
208+
~all_code
209+
in
210+
if not keep_symbol_tables
211+
then begin
212+
Compilenv.reset_info_tables ();
213+
Flambda2_identifiers.Code_id.reset ();
214+
Flambda2_identifiers.Continuation.reset ();
215+
Flambda2_identifiers.Reg_width_things.reset ()
216+
end;
217+
cmm
208218
in
209219
Profile.record_call "flambda2" run

middle_end/flambda2/flambda2.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ val lambda_to_cmm :
2626
module_ident:Ident.t ->
2727
module_block_size_in_words:int ->
2828
module_initializer:Lambda.lambda ->
29+
keep_symbol_tables:bool ->
2930
Cmm.phrase list
3031

3132
val symbol_for_global :

middle_end/flambda2/identifiers/code_id.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ let grand_table_of_code_ids = ref (Table.create ())
6767

6868
let initialise () = grand_table_of_code_ids := Table.create ()
6969

70+
let reset () = initialise ()
71+
7072
let find_data t = Table.find !grand_table_of_code_ids t
7173

7274
let get_compilation_unit t = (find_data t).compilation_unit

middle_end/flambda2/identifiers/code_id.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ module Lmap : Lmap.S with type key = t
2424

2525
val initialise : unit -> unit
2626

27+
val reset : unit -> unit
28+
2729
val create : name:string -> Compilation_unit.t -> t
2830

2931
val get_compilation_unit : t -> Compilation_unit.t

middle_end/flambda2/identifiers/continuation.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ let grand_table_of_continuations = ref (Table.create ())
137137

138138
let initialise () = grand_table_of_continuations := Table.create ()
139139

140+
let reset () = initialise ()
141+
140142
(* CR mshinwell: Document why this uses [next_raise_count]. Does it need to? It
141143
would be better if it didn't. *)
142144
let create ?sort ?name () : t =

middle_end/flambda2/identifiers/continuation.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,5 @@ module With_args : sig
5858
end
5959

6060
val initialise : unit -> unit
61+
62+
val reset : unit -> unit

middle_end/flambda2/identifiers/reg_width_things.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,3 +730,5 @@ let initialise () =
730730
Variable.initialise ();
731731
Symbol.initialise ();
732732
Simple.initialise ()
733+
734+
let reset () = initialise ()

middle_end/flambda2/identifiers/reg_width_things.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,5 @@ module Simple : sig
198198
end
199199

200200
val initialise : unit -> unit
201+
202+
val reset : unit -> unit

ocaml/driver/compenv.ml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -621,11 +621,13 @@ let c_object_of_filename name =
621621
Filename.chop_suffix (Filename.basename name) ".c" ^ Config.ext_obj
622622

623623
let process_action
624-
(ppf, implementation, interface, ocaml_mod_ext, ocaml_lib_ext) action =
624+
(ppf, implementation, interface, ocaml_mod_ext, ocaml_lib_ext) action
625+
~keep_symbol_tables =
625626
let impl ~start_from name =
626627
readenv ppf (Before_compile name);
627628
let opref = output_prefix name in
628-
implementation ~start_from ~source_file:name ~output_prefix:opref;
629+
implementation ~start_from ~source_file:name ~output_prefix:opref
630+
~keep_symbol_tables;
629631
objfiles := (opref ^ ocaml_mod_ext) :: !objfiles
630632
in
631633
match action with
@@ -709,7 +711,14 @@ let process_deferred_actions env =
709711
| ProcessOtherFile name -> Filename.check_suffix name ".cmxa"
710712
| _ -> false) !deferred_actions then
711713
fatal "Option -a cannot be used with .cmxa input files.";
712-
List.iter (process_action env) (List.rev !deferred_actions);
714+
let compiling_multiple_impls =
715+
List.length (List.filter (function
716+
| ProcessImplementation _ -> true
717+
| _ -> false) !deferred_actions) > 1
718+
in
719+
let keep_symbol_tables = compiling_multiple_impls in
720+
List.iter (process_action env ~keep_symbol_tables)
721+
(List.rev !deferred_actions);
713722
output_name := final_output_name;
714723
stop_early :=
715724
!compile_only ||

ocaml/driver/compenv.mli

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ val intf : string -> unit
8282
val process_deferred_actions :
8383
Format.formatter *
8484
(start_from:Clflags.Compiler_pass.t ->
85-
source_file:string -> output_prefix:string -> unit) *
85+
source_file:string -> output_prefix:string ->
86+
keep_symbol_tables:bool -> unit) *
8687
(* compile implementation *)
8788
(source_file:string -> output_prefix:string -> unit) *
8889
(* compile interface *)

ocaml/driver/compile.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ let emit_bytecode i (bytecode, required_globals) =
5757
(Emitcode.to_file oc i.module_name cmofile ~required_globals);
5858
)
5959

60-
let implementation ~start_from ~source_file ~output_prefix =
60+
let implementation ~start_from ~source_file ~output_prefix
61+
~keep_symbol_tables:_ =
6162
let backend info typed =
6263
let bytecode = to_bytecode info typed in
6364
emit_bytecode info bytecode

ocaml/driver/compile.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ val interface:
1919
source_file:string -> output_prefix:string -> unit
2020
val implementation:
2121
start_from:Clflags.Compiler_pass.t ->
22-
source_file:string -> output_prefix:string -> unit
22+
source_file:string -> output_prefix:string -> keep_symbol_tables:bool -> unit
2323

2424
(** {2 Internal functions} **)
2525

ocaml/driver/optcompile.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ let emit i =
8787
Compilenv.reset ?packname:!Clflags.for_package i.module_name;
8888
Asmgen.compile_implementation_linear i.output_prefix ~progname:i.source_file
8989

90-
let implementation ~backend ~start_from ~source_file ~output_prefix =
90+
let implementation ~backend ~start_from ~source_file
91+
~output_prefix ~keep_symbol_tables:_ =
9192
let backend info typed =
9293
Compilenv.reset ?packname:!Clflags.for_package info.module_name;
9394
if Config.flambda

ocaml/driver/optcompile.mli

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ val interface: source_file:string -> output_prefix:string -> unit
2020
val implementation:
2121
backend:(module Backend_intf.S)
2222
-> start_from:Clflags.Compiler_pass.t
23-
-> source_file:string -> output_prefix:string -> unit
23+
-> source_file:string -> output_prefix:string -> keep_symbol_tables:bool
24+
-> unit
2425

2526
(** {2 Internal functions} **)
2627

ocaml/typing/typemod.ml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2943,3 +2943,8 @@ let () =
29432943
| _ ->
29442944
None
29452945
)
2946+
2947+
let reset () =
2948+
Env.reset_cache ();
2949+
Envaux.reset_cache ();
2950+
Typetexp.reset_type_variables ()

0 commit comments

Comments
 (0)