Skip to content

Commit aefe3c9

Browse files
authored
gc scheduler synchronization fixes to 1.10 (JuliaLang#53661) (#133)
Cherry-pick the parts of JuliaLang#53355 which are relevant to the 1.10 GC.
1 parent 2c22634 commit aefe3c9

File tree

1 file changed

+24
-38
lines changed

1 file changed

+24
-38
lines changed

src/gc.c

Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2960,9 +2960,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29602960
jl_gc_markqueue_t *mq = &ptls->mark_queue;
29612961
jl_gc_markqueue_t *mq_master = NULL;
29622962
int master_tid = jl_atomic_load(&gc_master_tid);
2963-
if (master_tid == -1) {
2964-
return;
2965-
}
2963+
assert(master_tid != -1);
29662964
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
29672965
void *new_obj;
29682966
jl_gc_chunk_t c;
@@ -3053,61 +3051,49 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
30533051
* Correctness argument for the mark-loop termination protocol.
30543052
*
30553053
* Safety properties:
3056-
* - No work items shall be in any thread's queues when `gc_mark_loop_barrier` observes
3054+
* - No work items shall be in any thread's queues when `gc_should_mark` observes
30573055
* that `gc_n_threads_marking` is zero.
30583056
*
30593057
* - No work item shall be stolen from the master thread (i.e. mutator thread which started
30603058
* GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
3061-
* `gc_mark_loop_barrier` observes that `gc_n_threads_marking` is zero. This property is
3059+
* `gc_should_mark` observes that `gc_n_threads_marking` is zero. This property is
30623060
* necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
30633061
* `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
30643062
* and that no work is stolen from us at that point.
30653063
*
30663064
* Proof:
3067-
* - Suppose the master thread observes that `gc_n_threads_marking` is zero in
3068-
* `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
3069-
* Since threads try to steal from all threads' queues, this implies that all threads must
3070-
* have tried to steal from the queue which still has a work item left, but failed to do so,
3071-
* which violates the semantics of Chase-Lev's work-stealing queue.
3072-
*
3073-
* - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
3074-
* "master thread observes that `gc_n_threads_marking` is zero". Since we're using
3075-
* sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
3076-
* `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
3077-
* increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
3078-
* event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
3079-
* `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
3080-
* the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
3081-
* and therefore won't enter the mark-loop.
3065+
* - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
3066+
* means that no thread has work on their queue, this is guaranteed because a thread may only exit
3067+
* `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
3068+
* seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
3069+
* guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
3070+
* because incrementing is only legal from inside the lock. Therefore, no thread will reenter
3071+
* the mark-loop after `gc_n_threads_marking` reaches zero.
30823072
*/
30833073

3084-
int gc_should_mark(jl_ptls_t ptls)
3074+
int gc_should_mark(void)
30853075
{
30863076
int should_mark = 0;
3087-
int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
3088-
// fast path
3089-
if (n_threads_marking == 0) {
3090-
return 0;
3091-
}
30923077
uv_mutex_lock(&gc_queue_observer_lock);
30933078
while (1) {
3094-
int tid = jl_atomic_load(&gc_master_tid);
3095-
// fast path
3096-
if (tid == -1) {
3097-
break;
3098-
}
3099-
n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
3100-
// fast path
3079+
int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
31013080
if (n_threads_marking == 0) {
31023081
break;
31033082
}
3083+
int tid = jl_atomic_load_relaxed(&gc_master_tid);
3084+
assert(tid != -1);
31043085
size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
31053086
for (tid = gc_first_tid; tid < gc_first_tid + jl_n_markthreads; tid++) {
3106-
work += gc_count_work_in_queue(gc_all_tls_states[tid]);
3087+
jl_ptls_t ptls2 = gc_all_tls_states[tid];
3088+
if (ptls2 == NULL) {
3089+
continue;
3090+
}
3091+
work += gc_count_work_in_queue(ptls2);
31073092
}
31083093
// if there is a lot of work left, enter the mark loop
31093094
if (work >= 16 * n_threads_marking) {
3110-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
3095+
jl_atomic_fetch_add(&gc_n_threads_marking, 1); // A possibility would be to allow a thread that found lots
3096+
// of work to increment this
31113097
should_mark = 1;
31123098
break;
31133099
}
@@ -3119,22 +3105,22 @@ int gc_should_mark(jl_ptls_t ptls)
31193105

31203106
void gc_wake_all_for_marking(jl_ptls_t ptls)
31213107
{
3122-
jl_atomic_store(&gc_master_tid, ptls->tid);
31233108
uv_mutex_lock(&gc_threads_lock);
3124-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
31253109
uv_cond_broadcast(&gc_threads_cond);
31263110
uv_mutex_unlock(&gc_threads_lock);
31273111
}
31283112

31293113
void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
31303114
{
31313115
if (master) {
3116+
jl_atomic_store(&gc_master_tid, ptls->tid);
3117+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
31323118
gc_wake_all_for_marking(ptls);
31333119
gc_mark_and_steal(ptls);
31343120
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
31353121
}
31363122
while (1) {
3137-
int should_mark = gc_should_mark(ptls);
3123+
int should_mark = gc_should_mark();
31383124
if (!should_mark) {
31393125
break;
31403126
}

0 commit comments

Comments
 (0)