@@ -27,6 +27,8 @@ uv_mutex_t gc_threads_lock;
27
27
uv_cond_t gc_threads_cond ;
28
28
// To indicate whether concurrent sweeping should run
29
29
uv_sem_t gc_sweep_assists_needed ;
30
+ // Mutex used to coordinate entry of GC threads in the mark loop
31
+ uv_mutex_t gc_queue_observer_lock ;
30
32
31
33
// Linked list of callback functions
32
34
@@ -2706,8 +2708,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
2706
2708
jl_gc_markqueue_t * mq = & ptls -> mark_queue ;
2707
2709
jl_gc_markqueue_t * mq_master = NULL ;
2708
2710
int master_tid = jl_atomic_load (& gc_master_tid );
2709
- if (master_tid != -1 )
2710
- mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
2711
+ if (master_tid == -1 ) {
2712
+ return ;
2713
+ }
2714
+ mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
2711
2715
void * new_obj ;
2712
2716
jl_gc_chunk_t c ;
2713
2717
pop : {
@@ -2782,28 +2786,108 @@ void gc_mark_and_steal(jl_ptls_t ptls)
2782
2786
}
2783
2787
}
2784
2788
2789
+ size_t gc_count_work_in_queue (jl_ptls_t ptls ) JL_NOTSAFEPOINT
2790
+ {
2791
+ // assume each chunk is worth 256 units of work and each pointer
2792
+ // is worth 1 unit of work
2793
+ size_t work = 256 * (jl_atomic_load_relaxed (& ptls -> mark_queue .chunk_queue .bottom ) -
2794
+ jl_atomic_load_relaxed (& ptls -> mark_queue .chunk_queue .top ));
2795
+ work += (jl_atomic_load_relaxed (& ptls -> mark_queue .ptr_queue .bottom ) -
2796
+ jl_atomic_load_relaxed (& ptls -> mark_queue .ptr_queue .top ));
2797
+ return work ;
2798
+ }
2799
+
2800
+ /**
2801
+ * Correctness argument for the mark-loop termination protocol.
2802
+ *
2803
+ * Safety properties:
2804
+ * - No work items shall be in any thread's queues when `gc_mark_loop_barrier` observes
2805
+ * that `gc_n_threads_marking` is zero.
2806
+ *
2807
+ * - No work item shall be stolen from the master thread (i.e. mutator thread which started
2808
+ * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
2809
+ * `gc_mark_loop_barrier` observes that `gc_n_threads_marking` is zero. This property is
2810
+ * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
2811
+ * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
2812
+ * and that no work is stolen from us at that point.
2813
+ *
2814
+ * Proof:
2815
+ * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
2816
+ * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
2817
+ * Since threads try to steal from all threads' queues, this implies that all threads must
2818
+ * have tried to steal from the queue which still has a work item left, but failed to do so,
2819
+ * which violates the semantics of Chase-Lev's work-stealing queue.
2820
+ *
2821
+ * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the even
2822
+ * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
2823
+ * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
2824
+ * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
2825
+ * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
2826
+ * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
2827
+ * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
2828
+ * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
2829
+ * and therefore won't enter the mark-loop.
2830
+ */
2831
+
2832
+ int gc_should_mark (jl_ptls_t ptls )
2833
+ {
2834
+ int should_mark = 0 ;
2835
+ int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2836
+ // fast path
2837
+ if (n_threads_marking == 0 ) {
2838
+ return 0 ;
2839
+ }
2840
+ uv_mutex_lock (& gc_queue_observer_lock );
2841
+ while (1 ) {
2842
+ int tid = jl_atomic_load (& gc_master_tid );
2843
+ // fast path
2844
+ if (tid == -1 ) {
2845
+ break ;
2846
+ }
2847
+ n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2848
+ // fast path
2849
+ if (n_threads_marking == 0 ) {
2850
+ break ;
2851
+ }
2852
+ size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
2853
+ for (tid = gc_first_tid ; tid < gc_first_tid + jl_n_markthreads ; tid ++ ) {
2854
+ work += gc_count_work_in_queue (gc_all_tls_states [tid ]);
2855
+ }
2856
+ // if there is a lot of work left, enter the mark loop
2857
+ if (work >= 16 * n_threads_marking ) {
2858
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2859
+ should_mark = 1 ;
2860
+ break ;
2861
+ }
2862
+ jl_cpu_pause ();
2863
+ }
2864
+ uv_mutex_unlock (& gc_queue_observer_lock );
2865
+ return should_mark ;
2866
+ }
2867
+
2868
+ void gc_wake_all_for_marking (jl_ptls_t ptls )
2869
+ {
2870
+ jl_atomic_store (& gc_master_tid , ptls -> tid );
2871
+ uv_mutex_lock (& gc_threads_lock );
2872
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2873
+ uv_cond_broadcast (& gc_threads_cond );
2874
+ uv_mutex_unlock (& gc_threads_lock );
2875
+ }
2876
+
2785
2877
void gc_mark_loop_parallel (jl_ptls_t ptls , int master )
2786
2878
{
2787
- int backoff = GC_BACKOFF_MIN ;
2788
2879
if (master ) {
2789
- jl_atomic_store (& gc_master_tid , ptls -> tid );
2790
- // Wake threads up and try to do some work
2791
- uv_mutex_lock (& gc_threads_lock );
2792
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2793
- uv_cond_broadcast (& gc_threads_cond );
2794
- uv_mutex_unlock (& gc_threads_lock );
2880
+ gc_wake_all_for_marking (ptls );
2795
2881
gc_mark_and_steal (ptls );
2796
2882
jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
2797
2883
}
2798
- while (jl_atomic_load (& gc_n_threads_marking ) > 0 ) {
2799
- // Try to become a thief while other threads are marking
2800
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2801
- if (jl_atomic_load (& gc_master_tid ) != -1 ) {
2802
- gc_mark_and_steal (ptls );
2884
+ while (1 ) {
2885
+ int should_mark = gc_should_mark (ptls );
2886
+ if (!should_mark ) {
2887
+ break ;
2803
2888
}
2889
+ gc_mark_and_steal (ptls );
2804
2890
jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
2805
- // Failed to steal
2806
- gc_backoff (& backoff );
2807
2891
}
2808
2892
}
2809
2893
@@ -3543,6 +3627,7 @@ void jl_gc_init(void)
3543
3627
uv_mutex_init (& gc_threads_lock );
3544
3628
uv_cond_init (& gc_threads_cond );
3545
3629
uv_sem_init (& gc_sweep_assists_needed , 0 );
3630
+ uv_mutex_init (& gc_queue_observer_lock );
3546
3631
3547
3632
jl_gc_init_page ();
3548
3633
jl_gc_debug_init ();
0 commit comments