@@ -2960,9 +2960,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29602960    jl_gc_markqueue_t  * mq  =  & ptls -> mark_queue ;
29612961    jl_gc_markqueue_t  * mq_master  =  NULL ;
29622962    int  master_tid  =  jl_atomic_load (& gc_master_tid );
2963-     if  (master_tid  ==  -1 ) {
2964-         return ;
2965-     }
2963+     assert (master_tid  !=  -1 );
29662964    mq_master  =  & gc_all_tls_states [master_tid ]-> mark_queue ;
29672965    void  * new_obj ;
29682966    jl_gc_chunk_t  c ;
@@ -3053,61 +3051,49 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
30533051 * Correctness argument for the mark-loop termination protocol. 
30543052 * 
30553053 * Safety properties: 
3056-  * - No work items shall be in any thread's queues when `gc_mark_loop_barrier ` observes 
3054+  * - No work items shall be in any thread's queues when `gc_should_mark ` observes 
30573055 * that `gc_n_threads_marking` is zero. 
30583056 * 
30593057 * - No work item shall be stolen from the master thread (i.e. mutator thread which started 
30603058 * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after 
3061-  * `gc_mark_loop_barrier ` observes that `gc_n_threads_marking` is zero. This property is 
3059+  * `gc_should_mark ` observes that `gc_n_threads_marking` is zero. This property is 
30623060 * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in 
30633061 * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there, 
30643062 * and that no work is stolen from us at that point. 
30653063 * 
30663064 * Proof: 
3067-  * - Suppose the master thread observes that `gc_n_threads_marking` is zero in 
3068-  * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point. 
3069-  * Since threads try to steal from all threads' queues, this implies that all threads must 
3070-  * have tried to steal from the queue which still has a work item left, but failed to do so, 
3071-  * which violates the semantics of Chase-Lev's work-stealing queue. 
3072-  * 
3073-  * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event 
3074-  * "master thread observes that `gc_n_threads_marking` is zero". Since we're using 
3075-  * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in 
3076-  * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must 
3077-  * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an 
3078-  * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed 
3079-  * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that 
3080-  * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1 
3081-  * and therefore won't enter the mark-loop. 
3065+  * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that 
3066+  * means that no thread has work on their queue, this is guaranteed because a thread may only exit 
3067+  * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the 
3068+  * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock` 
3069+  * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again, 
3070+  * because incrementing is only legal from inside the lock. Therefore, no thread will reenter 
3071+  * the mark-loop after `gc_n_threads_marking` reaches zero. 
30823072 */ 
30833073
3084- int  gc_should_mark (jl_ptls_t   ptls )
3074+ int  gc_should_mark (void )
30853075{
30863076    int  should_mark  =  0 ;
3087-     int  n_threads_marking  =  jl_atomic_load (& gc_n_threads_marking );
3088-     // fast path 
3089-     if  (n_threads_marking  ==  0 ) {
3090-         return  0 ;
3091-     }
30923077    uv_mutex_lock (& gc_queue_observer_lock );
30933078    while  (1 ) {
3094-         int  tid  =  jl_atomic_load (& gc_master_tid );
3095-         // fast path 
3096-         if  (tid  ==  -1 ) {
3097-             break ;
3098-         }
3099-         n_threads_marking  =  jl_atomic_load (& gc_n_threads_marking );
3100-         // fast path 
3079+         int  n_threads_marking  =  jl_atomic_load (& gc_n_threads_marking );
31013080        if  (n_threads_marking  ==  0 ) {
31023081            break ;
31033082        }
3083+         int  tid  =  jl_atomic_load_relaxed (& gc_master_tid );
3084+         assert (tid  !=  -1 );
31043085        size_t  work  =  gc_count_work_in_queue (gc_all_tls_states [tid ]);
31053086        for  (tid  =  gc_first_tid ; tid  <  gc_first_tid  +  jl_n_markthreads ; tid ++ ) {
3106-             work  +=  gc_count_work_in_queue (gc_all_tls_states [tid ]);
3087+             jl_ptls_t  ptls2  =  gc_all_tls_states [tid ];
3088+             if  (ptls2  ==  NULL ) {
3089+                 continue ;
3090+             }
3091+             work  +=  gc_count_work_in_queue (ptls2 );
31073092        }
31083093        // if there is a lot of work left, enter the mark loop 
31093094        if  (work  >= 16  *  n_threads_marking ) {
3110-             jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3095+             jl_atomic_fetch_add (& gc_n_threads_marking , 1 ); // A possibility would be to allow a thread that found lots 
3096+                                                            // of work to increment this 
31113097            should_mark  =  1 ;
31123098            break ;
31133099        }
@@ -3119,22 +3105,22 @@ int gc_should_mark(jl_ptls_t ptls)
31193105
31203106void  gc_wake_all_for_marking (jl_ptls_t  ptls )
31213107{
3122-     jl_atomic_store (& gc_master_tid , ptls -> tid );
31233108    uv_mutex_lock (& gc_threads_lock );
3124-     jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
31253109    uv_cond_broadcast (& gc_threads_cond );
31263110    uv_mutex_unlock (& gc_threads_lock );
31273111}
31283112
31293113void  gc_mark_loop_parallel (jl_ptls_t  ptls , int  master )
31303114{
31313115    if  (master ) {
3116+         jl_atomic_store (& gc_master_tid , ptls -> tid );
3117+         jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
31323118        gc_wake_all_for_marking (ptls );
31333119        gc_mark_and_steal (ptls );
31343120        jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
31353121    }
31363122    while  (1 ) {
3137-         int  should_mark  =  gc_should_mark (ptls );
3123+         int  should_mark  =  gc_should_mark ();
31383124        if  (!should_mark ) {
31393125            break ;
31403126        }
0 commit comments