Skip to content

Commit 9f2ce2a

Browse files
committed
Initial setup
1 parent 8a03412 commit 9f2ce2a

9 files changed

+309
-2
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ setup.log
2727

2828
# Local OPAM switch
2929
_opam/
30+
31+
*.swp

README.md

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,58 @@
1-
# dynamic-gc
2-
Dynamic tuning for the OCaml garbage collector
1+
# Dynamic GC
2+
Dynamic tuning for the OCaml garbage collector.
3+
4+
This utility allows you to instruct the OCaml garbage collector to become more
5+
aggressive as the size of the major heap grows. This can be useful if you would
6+
prefer your application to execute quickly with few resources dedicated to
7+
garbage collection when memory usage is low, but would like to change that
8+
trade-off when memory usage is high.
9+
10+
Specifically, this utility adjusts the `space_overhead` value at the end of each
11+
major collection using the [`Gc`](https://ocaml.org/manual/5.3/api/Gc.html)
12+
module.
13+
14+
## Usage
15+
16+
Install via `opam install dynamic_gc`.
17+
18+
Use according to
19+
[`DynamicGc.mli`](https://github.com/semgrep/dynamic-gc/blob/main/lib/DynamicGc.mli).
20+
21+
For example, you might put the following at or near the entry point to your
22+
program. It would allow `space_overhead` to range between 20 and 40, such that
23+
it is 40 when the size of the major heap is less than 2 GB, 20 when the size of
24+
the major heap is greater than 4 GB, and linearly interpolated in between.
25+
26+
```
27+
DynamicGc.(setup_dynamic_tuning
28+
{
29+
min_space_overhead = 20;
30+
max_space_overhead = 40;
31+
heap_start_worrying_mb = 2_048;
32+
heap_really_worry_mb = 4_096;
33+
});
34+
```
35+
36+
## Caveats
37+
38+
This tunes the garbage collector based on the size of the major heap, not the
39+
amount of memory that is currently live. Because OCaml 5 does not do compaction
40+
by default, this means that if your program uses a lot of memory and then
41+
releases it, the garbage collector may still be set to an aggressive setting
42+
because the major heap is still large. You may need to manually call
43+
[`Gc.compact ()`](https://ocaml.org/manual/5.3/api/Gc.html#VALcompact) in order
44+
to free memory and reduce the size of the major heap, thereby allowing this
45+
utility to increase the value of `space_overhead`.
46+
47+
This utility relies on garbage collector alarms. This functionality is [broken
48+
in OCaml
49+
5.2.0](https://discuss.ocaml.org/t/changes-in-handling-of-gc-parameters-and-alarms-in-5-2-0/14986).
50+
Upgrade to 5.2.1 instead to use this utility.
51+
52+
## Contributing
53+
54+
To build: `dune build`
55+
56+
To test: `dune test`
57+
58+
To use your local copy in another project: `opam pin .`

dune-project

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
(lang dune 3.17)
2+
3+
(name dynamic_gc)
4+
5+
(generate_opam_files true)
6+
7+
(source
8+
(github semgrep/dynamic-gc))
9+
10+
(authors "Nat Mote <nat@semgrep.com>")
11+
12+
(maintainers "Nat Mote <nat@semgrep.com>")
13+
14+
(license MIT)
15+
16+
(documentation https://github.com/semgrep/dynamic-gc)
17+
18+
(package
19+
(name dynamic_gc)
20+
(synopsis "Dynamically adjust GC behavior based on memory usage")
21+
(description "A utility to dynamically adjust garbage collector behavior based
22+
on memory usage, allowing your application to prioritize improved
23+
run time when memory usage is low, but prioritize decreased memory
24+
usage when memory usage is high.")
25+
(depends ocaml)
26+
(tags
27+
("garbage collector" "gc")))

dynamic_gc.opam

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# This file is generated by dune, edit dune-project instead
2+
opam-version: "2.0"
3+
synopsis: "Dynamically adjust GC behavior based on memory usage"
4+
description: """
5+
A utility to dynamically adjust garbage collector behavior based
6+
on memory usage, allowing your application to prioritize improved
7+
run time when memory usage is low, but prioritize decreased memory
8+
usage when memory usage is high."""
9+
maintainer: ["Nat Mote <nat@semgrep.com>"]
10+
authors: ["Nat Mote <nat@semgrep.com>"]
11+
license: "MIT"
12+
tags: ["garbage collector" "gc"]
13+
homepage: "https://github.com/semgrep/dynamic-gc"
14+
doc: "https://github.com/semgrep/dynamic-gc"
15+
bug-reports: "https://github.com/semgrep/dynamic-gc/issues"
16+
depends: [
17+
"dune" {>= "3.17"}
18+
"ocaml"
19+
"odoc" {with-doc}
20+
]
21+
build: [
22+
["dune" "subst"] {dev}
23+
[
24+
"dune"
25+
"build"
26+
"-p"
27+
name
28+
"-j"
29+
jobs
30+
"@install"
31+
"@runtest" {with-test}
32+
"@doc" {with-doc}
33+
]
34+
]
35+
dev-repo: "git+https://github.com/semgrep/dynamic-gc.git"

lib/DynamicGc.ml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
(*
2+
* Copyright (C) 2024-2025 Semgrep, Inc.
3+
*
4+
* This source code is licensed under the MIT license found in the LICENSE file
5+
* in the root directory of this source tree.
6+
*)
7+
8+
let alarm : Gc.alarm option ref = ref None
9+
10+
type config = {
11+
min_space_overhead : int;
12+
max_space_overhead : int;
13+
heap_start_worrying_mb : int;
14+
heap_really_worry_mb : int;
15+
}
16+
17+
let space_overhead_of_heap_size config heap_size_mb =
18+
if config.heap_start_worrying_mb = config.heap_really_worry_mb then begin
19+
(* Avoid a division by zero *)
20+
if heap_size_mb > config.heap_start_worrying_mb then
21+
config.min_space_overhead
22+
else config.max_space_overhead
23+
end
24+
else
25+
(* Linear interpolation *)
26+
let heap_size_mb = Float.of_int heap_size_mb in
27+
let min_space_overhead = Float.of_int config.min_space_overhead in
28+
let max_space_overhead = Float.of_int config.max_space_overhead in
29+
let heap_start_worrying_mb = Float.of_int config.heap_start_worrying_mb in
30+
let heap_really_worry_mb = Float.of_int config.heap_really_worry_mb in
31+
let rate =
32+
(min_space_overhead -. max_space_overhead)
33+
/. (heap_really_worry_mb -. heap_start_worrying_mb)
34+
in
35+
let intercept = max_space_overhead -. (rate *. heap_start_worrying_mb) in
36+
let space_overhead_unbounded = (rate *. heap_size_mb) +. intercept in
37+
let space_overhead =
38+
max min_space_overhead (min space_overhead_unbounded max_space_overhead)
39+
in
40+
space_overhead |> Float.round |> Float.to_int
41+
42+
let handle_alarm config () =
43+
let { Gc.heap_words; _ } = Gc.quick_stat () in
44+
let word_size_bytes = Sys.word_size / 8 in
45+
let heap_size_mb = heap_words * word_size_bytes / (1_024 * 1_024) in
46+
let space_overhead = space_overhead_of_heap_size config heap_size_mb in
47+
Gc.set { (Gc.get ()) with space_overhead }
48+
49+
let setup_dynamic_tuning config =
50+
if Option.is_some !alarm then
51+
failwith "Gc_.setup_dynamic_tuning called multiple times.";
52+
(* Can be the same, but that would be kind of stupid *)
53+
if config.min_space_overhead > config.max_space_overhead then
54+
failwith
55+
"Gc_.setup_dynamic_tuning called with min_space_overhead greater than \
56+
max_space_overhead";
57+
(* Can be equal if you want a sharp cutover from max space overhead to min! *)
58+
if config.heap_start_worrying_mb > config.heap_really_worry_mb then
59+
failwith
60+
"Gc_.setup_dynamic_tuning called with nonsensical heap size arguments";
61+
(* Call this immediately so that we set space_overhead as desired without
62+
* first having to wait for a major collection to complete. *)
63+
handle_alarm config ();
64+
alarm := Some (Gc.create_alarm (handle_alarm config))
65+
66+
let stop_dynamic_tuning () =
67+
match !alarm with
68+
| Some a ->
69+
Gc.delete_alarm a;
70+
alarm := None
71+
| None ->
72+
failwith
73+
"Gc_.stop_dynamic_tuning called when dynamic tuning was already \
74+
stopped."
75+
76+
module ForTestingDoNotUse = struct
77+
let space_overhead_of_heap_size = space_overhead_of_heap_size
78+
end

lib/DynamicGc.mli

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
(*
2+
* Copyright (C) 2024-2025 Semgrep, Inc.
3+
*
4+
* This source code is licensed under the MIT license found in the LICENSE file
5+
* in the root directory of this source tree.
6+
*)
7+
8+
(* Utilities for interacting with the OCaml garbage collector. See
9+
* https://ocaml.org/manual/5.2/api/Gc.html for context. Without familiarity
10+
* with the Gc module, this module is not likely to make a whole lot of sense.
11+
* *)
12+
13+
(* Configuration for dynamic GC tuning. This allows us to prioritize time over
14+
* space by minimizing time spent garbage collecting when the major heap is
15+
* small. As the major heap grows, we can gradually make the GC more aggressive
16+
* and begin to prioritize space efficiency at the expense of time. *)
17+
type config = {
18+
(* The minimum value for space_overhead that should be dynamically applied.
19+
* This is the MOST aggressive GC setting. *)
20+
min_space_overhead : int;
21+
(* The maximum setting for space_overhead that should be dynamically applied.
22+
* This is the LEAST aggressive GC setting. *)
23+
max_space_overhead : int;
24+
(* The size of the major heap where we start becoming more aggressive with
25+
* space_overhead. Below this size, we always set space_overhead to
26+
* max_space_overhead. Above, we interpolate linearly until we reach
27+
* min_space_overhead at heap_really_worry_mb. *)
28+
heap_start_worrying_mb : int;
29+
(* The size of the major heap where we apply the most aggressive setting for
30+
* space_overhead (min_space_overhead) in an attempt to minimize memory
31+
* consumption at the cost of runtime. *)
32+
heap_really_worry_mb : int;
33+
}
34+
35+
(* Starts dynamic GC tuning based on the given config. Recomputes and applies
36+
* the desired space_overhead after each major collection.
37+
*
38+
* Precondition: As this mutates global state, it cannot be called multiple
39+
* times without intervening calls to `stop_dynamic_tuning`. *)
40+
val setup_dynamic_tuning : config -> unit
41+
42+
(* Stops the ongoing dynamic GC tuning. Does not return space_overhead to its
43+
* previous value.
44+
*
45+
* Precondition: Cannot be called unless dynamic tuning is currently ongoing. *)
46+
val stop_dynamic_tuning : unit -> unit
47+
48+
module ForTestingDoNotUse : sig
49+
val space_overhead_of_heap_size : config -> int -> int
50+
end

lib/dune

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
(library
2+
(public_name dynamic_gc)
3+
(name dynamic_gc)
4+
(wrapped false))

test/dune

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
(test
2+
(name test_dynamic_gc)
3+
(libraries dynamic_gc))

test/test_dynamic_gc.ml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
(*
2+
* Copyright (C) 2024-2025 Semgrep, Inc.
3+
*
4+
* This source code is licensed under the MIT license found in the LICENSE file
5+
* in the root directory of this source tree.
6+
*)
7+
8+
(* If this grows any more complex, pull in Alcotest and Testo *)
9+
let check (expected: int) (actual: int) (case_name: string) =
10+
if expected <> actual then
11+
failwith (Printf.sprintf "%s: expected %d, got %d" case_name expected actual)
12+
13+
(* name, config, heap size, expected space overhead *)
14+
type case = string * DynamicGc.config * int * int
15+
16+
let simple_config =
17+
DynamicGc.
18+
{
19+
min_space_overhead = 20;
20+
max_space_overhead = 40;
21+
heap_start_worrying_mb = 1_024;
22+
heap_really_worry_mb = 4_096;
23+
}
24+
25+
let same_heap_size_cutoff =
26+
DynamicGc.
27+
{
28+
min_space_overhead = 20;
29+
max_space_overhead = 40;
30+
heap_start_worrying_mb = 1_024;
31+
heap_really_worry_mb = 1_024;
32+
}
33+
34+
let cases : case list =
35+
[
36+
("under worrying limit", simple_config, 512, 40);
37+
("at worrying limit", simple_config, 1_024, 40);
38+
("halfway", simple_config, 2_560, 30);
39+
("at really worrying limit", simple_config, 4_096, 20);
40+
("above really worrying limit", simple_config, 8_192, 20);
41+
("below single cutoff", same_heap_size_cutoff, 1_023, 40);
42+
("at single cutoff", same_heap_size_cutoff, 1_024, 40);
43+
("above single cutoff", same_heap_size_cutoff, 1_025, 20);
44+
]
45+
46+
let check_case (name, config, heap_size, expected_space_overhead) =
47+
check
48+
expected_space_overhead
49+
(DynamicGc.ForTestingDoNotUse.space_overhead_of_heap_size config heap_size)
50+
name
51+
52+
let () = List.iter check_case cases

0 commit comments

Comments
 (0)