@@ -799,7 +799,7 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
799799FORCE_INLINE int gc_try_setmark_tag (jl_taggedvalue_t * o , uint8_t mark_mode ) JL_NOTSAFEPOINT
800800{
801801 assert (gc_marked (mark_mode ));
802- uintptr_t tag = jl_atomic_load_relaxed ((_Atomic ( uintptr_t ) * ) & o -> header ) ;
802+ uintptr_t tag = o -> header ;
803803 if (gc_marked (tag ))
804804 return 0 ;
805805 if (mark_reset_age ) {
@@ -813,9 +813,13 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N
813813 tag = tag | mark_mode ;
814814 assert ((tag & 0x3 ) == mark_mode );
815815 }
816- jl_atomic_store_relaxed ((_Atomic (uintptr_t )* )& o -> header , tag ); //xchg here was slower than
817- verify_val (jl_valueof (o )); //potentially redoing work because of a stale tag.
818- return 1 ;
816+ // XXX: note that marking not only sets the GC bits but also updates the
817+ // page metadata for pool allocated objects.
818+ // The second step is **not** idempotent, so we need a compare exchange here
819+ // (instead of a pair of load&store) to avoid marking an object twice
820+ tag = jl_atomic_exchange_relaxed ((_Atomic (uintptr_t )* )& o -> header , tag );
821+ verify_val (jl_valueof (o ));
822+ return !gc_marked (tag );
819823}
820824
821825// This function should be called exactly once during marking for each big
@@ -854,7 +858,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
854858 if (mark_mode == GC_OLD_MARKED ) {
855859 ptls -> gc_cache .perm_scanned_bytes += page -> osize ;
856860 static_assert (sizeof (_Atomic (uint16_t )) == sizeof (page -> nold ), "" );
857- page -> nold ++ ;
861+ jl_atomic_fetch_add_relaxed (( _Atomic ( uint16_t ) * ) & page -> nold , 1 ) ;
858862 }
859863 else {
860864 ptls -> gc_cache .scanned_bytes += page -> osize ;
@@ -1377,27 +1381,20 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
13771381 nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET ) / osize ;
13781382 goto done ;
13791383 }
1380- // note that `pg->nold` may not be accurate with multithreaded marking since
1381- // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`.
1382- // We're basically losing a bit of precision in the sweep phase at the cost of
1383- // making the mark phase considerably cheaper.
1384- // See issue #50419
1385- if (jl_n_markthreads == 0 ) {
1386- // For quick sweep, we might be able to skip the page if the page doesn't
1387- // have any young live cell before marking.
1388- if (!sweep_full && !pg -> has_young ) {
1389- assert (!prev_sweep_full || pg -> prev_nold >= pg -> nold );
1390- if (!prev_sweep_full || pg -> prev_nold == pg -> nold ) {
1391- // the position of the freelist begin/end in this page
1392- // is stored in its metadata
1393- if (pg -> fl_begin_offset != (uint16_t )-1 ) {
1394- * pfl = page_pfl_beg (pg );
1395- pfl = (jl_taggedvalue_t * * )page_pfl_end (pg );
1396- }
1397- freedall = 0 ;
1398- nfree = pg -> nfree ;
1399- goto done ;
1384+ // For quick sweep, we might be able to skip the page if the page doesn't
1385+ // have any young live cell before marking.
1386+ if (!sweep_full && !pg -> has_young ) {
1387+ assert (!prev_sweep_full || pg -> prev_nold >= pg -> nold );
1388+ if (!prev_sweep_full || pg -> prev_nold == pg -> nold ) {
1389+ // the position of the freelist begin/end in this page
1390+ // is stored in its metadata
1391+ if (pg -> fl_begin_offset != (uint16_t )-1 ) {
1392+ * pfl = page_pfl_beg (pg );
1393+ pfl = (jl_taggedvalue_t * * )page_pfl_end (pg );
14001394 }
1395+ freedall = 0 ;
1396+ nfree = pg -> nfree ;
1397+ goto done ;
14011398 }
14021399 }
14031400
0 commit comments