diff --git a/src/gc-debug.c b/src/gc-debug.c index 22a43672a347a..54b6d5f747bc4 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -112,7 +112,7 @@ static void gc_clear_mark_outer(int bits) { for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; - jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom); while (pg != NULL) { gc_clear_mark_page(pg, bits); pg = pg->next; @@ -1157,7 +1157,7 @@ static void gc_count_pool_pagetable(void) { for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; - jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom); while (pg != NULL) { if (gc_alloc_map_is_set(pg->data)) { gc_count_pool_page(pg); diff --git a/src/gc-pages.c b/src/gc-pages.c index 13e992189f67e..d5ad87e180cb4 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -129,22 +129,22 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT jl_gc_pagemeta_t *meta = NULL; // try to get page from `pool_clean` - meta = pop_lf_page_metadata_back(&global_page_pool_clean); + meta = pop_lf_back(&global_page_pool_clean); if (meta != NULL) { gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); goto exit; } // try to get page from `pool_freed` - meta = pop_lf_page_metadata_back(&global_page_pool_freed); + meta = pop_lf_back(&global_page_pool_freed); if (meta != NULL) { gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); goto exit; } uv_mutex_lock(&gc_perm_lock); - // another thread may have allocated a large block while we're waiting... - meta = pop_lf_page_metadata_back(&global_page_pool_clean); + // another thread may have allocated a large block while we were waiting... + meta = pop_lf_back(&global_page_pool_clean); if (meta != NULL) { uv_mutex_unlock(&gc_perm_lock); gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); @@ -161,7 +161,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED); } else { - push_lf_page_metadata_back(&global_page_pool_clean, pg); + push_lf_back(&global_page_pool_clean, pg); } } uv_mutex_unlock(&gc_perm_lock); diff --git a/src/gc.c b/src/gc.c index a985394968ac1..bec9c45274c37 100644 --- a/src/gc.c +++ b/src/gc.c @@ -11,18 +11,20 @@ extern "C" { #endif +// Number of threads currently running the GC mark-loop +_Atomic(int) gc_n_threads_marking; +// Number of threads sweeping +_Atomic(int) gc_n_threads_sweeping; +// Temporary for the `ptls->page_metadata_allocd` used during parallel sweeping +_Atomic(jl_gc_page_stack_t *) gc_allocd_scratch; // `tid` of mutator thread that triggered GC _Atomic(int) gc_master_tid; // `tid` of first GC thread int gc_first_tid; - // Mutex/cond used to synchronize sleep/wakeup of GC threads uv_mutex_t gc_threads_lock; uv_cond_t gc_threads_cond; -// Number of threads currently running the GC mark-loop -_Atomic(int) gc_n_threads_marking; - // Linked list of callback functions typedef void (*jl_gc_cb_func_t)(void); @@ -844,6 +846,7 @@ static int mark_reset_age = 0; static int64_t scanned_bytes; // young bytes scanned while marking static int64_t perm_scanned_bytes; // old bytes scanned while marking int prev_sweep_full = 1; +int current_sweep_full = 0; #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 @@ -1392,8 +1395,9 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_ return beg; } -jl_gc_global_page_pool_t global_page_pool_clean; -jl_gc_global_page_pool_t global_page_pool_freed; +jl_gc_page_stack_t global_page_pool_lazily_freed; +jl_gc_page_stack_t global_page_pool_clean; +jl_gc_page_stack_t global_page_pool_freed; pagetable_t alloc_map; // Add a new page to the pool. Discards any pages in `p->newpages` before. @@ -1402,7 +1406,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT // Do not pass in `ptls` as argument. This slows down the fast path // in pool_alloc significantly jl_ptls_t ptls = jl_current_task->ptls; - jl_gc_pagemeta_t *pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed); + jl_gc_pagemeta_t *pg = pop_lf_back(&ptls->page_metadata_buffered); if (pg != NULL) { gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED); } @@ -1413,7 +1417,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1); pg->thread_n = ptls->tid; set_page_metadata(pg); - push_page_metadata_back(&ptls->page_metadata_allocd, pg); + push_lf_back(&ptls->page_metadata_allocd, pg); jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg); p->newpages = fl; return fl; @@ -1511,11 +1515,11 @@ int jl_gc_classify_pools(size_t sz, int *osize) // sweep phase -int64_t lazy_freed_pages = 0; +int64_t buffered_pages = 0; // Returns pointer to terminal pointer of list rooted at *pfl. -static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allocd, - jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT +static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *buffered, + jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT { char *data = pg->data; uint8_t *ages = pg->ages; @@ -1529,7 +1533,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo size_t nfree; int re_use_page = 1; - int freed_lazily = 0; + int keep_as_local_buffer = 0; int freedall = 1; int pg_skpd = 1; if (!pg->has_marked) { @@ -1539,24 +1543,18 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo // the eager one uses less memory. // FIXME - need to do accounting on a per-thread basis // on quick sweeps, keep a few pages empty but allocated for performance - if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) { - lazy_freed_pages++; - freed_lazily = 1; + if (!current_sweep_full && buffered_pages <= default_collect_interval / GC_PAGE_SZ) { + buffered_pages++; + keep_as_local_buffer = 1; } nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize; goto done; } // For quick sweep, we might be able to skip the page if the page doesn't // have any young live cell before marking. - if (!sweep_full && !pg->has_young) { + if (!current_sweep_full && !pg->has_young) { assert(!prev_sweep_full || pg->prev_nold >= pg->nold); if (!prev_sweep_full || pg->prev_nold == pg->nold) { - // the position of the freelist begin/end in this page - // is stored in its metadata - if (pg->fl_begin_offset != (uint16_t)-1) { - *pfl = page_pfl_beg(pg); - pfl = (jl_taggedvalue_t**)page_pfl_end(pg); - } freedall = 0; nfree = pg->nfree; goto done; @@ -1569,6 +1567,8 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo int has_young = 0; int16_t prev_nold = 0; int pg_nfree = 0; + jl_taggedvalue_t *fl = NULL; + jl_taggedvalue_t **pfl = &fl; jl_taggedvalue_t **pfl_begin = NULL; uint8_t msk = 1; // mask for the age bit in the current age byte while ((char*)v <= lim) { @@ -1585,7 +1585,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo if (*ages & msk || bits == GC_OLD_MARKED) { // old enough // `!age && bits == GC_OLD_MARKED` is possible for // non-first-class objects like `jl_binding_t` - if (sweep_full || bits == GC_MARKED) { + if (current_sweep_full || bits == GC_MARKED) { bits = v->bits.gc = GC_OLD; // promote } prev_nold++; @@ -1620,7 +1620,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo } pg->nfree = pg_nfree; - if (sweep_full) { + if (current_sweep_full) { pg->nold = 0; pg->prev_nold = prev_nold; } @@ -1629,32 +1629,32 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo done: if (re_use_page) { - push_page_metadata_back(allocd, pg); - } - else if (freed_lazily) { - gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED); - push_page_metadata_back(lazily_freed, pg); + push_lf_back(allocd, pg); } else { - jl_gc_free_page(pg); - push_lf_page_metadata_back(&global_page_pool_freed, pg); + gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED); + if (keep_as_local_buffer) { + push_lf_back(buffered, pg); + } + else { + push_lf_back(&global_page_pool_lazily_freed, pg); + } } gc_time_count_page(freedall, pg_skpd); - gc_num.freed += (nfree - old_nfree) * osize; - pool_live_bytes += GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize; - return pfl; + jl_atomic_fetch_add((_Atomic(int64_t) *)&pool_live_bytes, GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); + jl_atomic_fetch_add((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } // the actual sweeping over all allocated pages in a memory pool -STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **allocd, - jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT +STATIC_INLINE void gc_sweep_pool_page(jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *lazily_freed, + jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { int p_n = pg->pool_n; int t_n = pg->thread_n; jl_ptls_t ptls2 = gc_all_tls_states[t_n]; jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n]; int osize = pg->osize; - pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, allocd, lazily_freed, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); + gc_sweep_page(p, allocd, lazily_freed, pg, osize); } // sweep over all memory that is being used and not in a pool @@ -1680,11 +1680,70 @@ static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_ pg->nfree = nfree; } +void gc_sweep_wake_all(void) +{ + uv_mutex_lock(&gc_threads_lock); + for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1); + } + uv_cond_broadcast(&gc_threads_cond); + uv_mutex_unlock(&gc_threads_lock); +} + +void gc_sweep_pool_parallel(void) +{ + jl_atomic_fetch_add(&gc_n_threads_sweeping, 1); + jl_gc_page_stack_t *allocd_scratch = jl_atomic_load(&gc_allocd_scratch); + if (allocd_scratch != NULL) { + while (1) { + int found_pg = 0; + for (int t_i = 0; t_i < gc_n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 == NULL) { + continue; + } + jl_gc_page_stack_t *allocd = &allocd_scratch[t_i]; + jl_gc_pagemeta_t *pg = pop_lf_back(&ptls2->page_metadata_allocd); + if (pg == NULL) { + continue; + } + gc_sweep_pool_page(allocd, &ptls2->page_metadata_buffered, pg); + found_pg = 1; + } + if (!found_pg) { + break; + } + } + } + jl_atomic_fetch_add(&gc_n_threads_sweeping, -1); +} + +void gc_sweep_wait_for_all(void) +{ + jl_atomic_store(&gc_allocd_scratch, NULL); + while (jl_atomic_load_relaxed(&gc_n_threads_sweeping) != 0) { + jl_cpu_pause(); + } +} + +void gc_free_pages(void) +{ + while (1) { + jl_gc_pagemeta_t *pg = pop_lf_back(&global_page_pool_lazily_freed); + if (pg == NULL) { + break; + } + jl_gc_free_page(pg); + push_lf_back(&global_page_pool_freed, pg); + } +} + // setup the data-structures for a sweep over all memory pools -static void gc_sweep_pool(int sweep_full) +static void gc_sweep_pool(void) { gc_time_pool_start(); - lazy_freed_pages = 0; + buffered_pages = 0; // For the benefit of the analyzer, which doesn't know that gc_n_threads // doesn't change over the course of this function @@ -1724,26 +1783,26 @@ static void gc_sweep_pool(int sweep_full) pg->has_young = 1; } } - jl_gc_pagemeta_t *pg = ptls2->page_metadata_lazily_freed; + jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_buffered.bottom); while (pg != NULL) { jl_gc_pagemeta_t *pg2 = pg->next; - lazy_freed_pages++; + buffered_pages++; pg = pg2; } } // the actual sweeping + jl_gc_page_stack_t *tmp = (jl_gc_page_stack_t *)alloca(n_threads * sizeof(jl_gc_page_stack_t)); + memset(tmp, 0, n_threads * sizeof(jl_gc_page_stack_t)); + jl_atomic_store(&gc_allocd_scratch, tmp); + gc_sweep_wake_all(); + gc_sweep_pool_parallel(); + gc_sweep_wait_for_all(); + for (int t_i = 0; t_i < n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - jl_gc_pagemeta_t *allocd = NULL; - jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; - while (pg != NULL) { - jl_gc_pagemeta_t *pg2 = pg->next; - gc_sweep_pool_page(pfl, &allocd, &ptls2->page_metadata_lazily_freed, pg, sweep_full); - pg = pg2; - } - ptls2->page_metadata_allocd = allocd; + ptls2->page_metadata_allocd = tmp[t_i]; for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->heap.norm_pools[i]; p->newpages = NULL; @@ -1751,6 +1810,26 @@ static void gc_sweep_pool(int sweep_full) } } + // merge free lists + for (int t_i = 0; t_i < n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 == NULL) { + continue; + } + jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom); + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + if (pg->fl_begin_offset != UINT16_MAX) { + char *cur_pg = pg->data; + jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset); + jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset); + *pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg; + pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next; + } + pg = pg2; + } + } + // null out terminal pointers of free lists for (int t_i = 0; t_i < n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; @@ -1761,7 +1840,8 @@ static void gc_sweep_pool(int sweep_full) } } - gc_time_pool_end(sweep_full); + gc_free_pages(); + gc_time_pool_end(current_sweep_full); } static void gc_sweep_perm_alloc(void) @@ -3359,13 +3439,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) pool_live_bytes = 0; uint64_t start_sweep_time = jl_hrtime(); JL_PROBE_GC_SWEEP_BEGIN(sweep_full); + current_sweep_full = sweep_full; sweep_weak_refs(); sweep_stack_pools(); gc_sweep_foreign_objs(); gc_sweep_other(ptls, sweep_full); gc_scrub(); gc_verify_tags(); - gc_sweep_pool(sweep_full); + gc_sweep_pool(); if (sweep_full) gc_sweep_perm_alloc(); JL_PROBE_GC_SWEEP_END(); diff --git a/src/gc.h b/src/gc.h index 1bdd278321bdd..b654c77b9ab09 100644 --- a/src/gc.h +++ b/src/gc.h @@ -185,12 +185,9 @@ typedef struct _jl_gc_pagemeta_t { uint8_t *ages; } jl_gc_pagemeta_t; -typedef struct { - _Atomic(jl_gc_pagemeta_t *) page_metadata_back; -} jl_gc_global_page_pool_t; - -extern jl_gc_global_page_pool_t global_page_pool_clean; -extern jl_gc_global_page_pool_t global_page_pool_freed; +extern jl_gc_page_stack_t global_page_pool_lazily_freed; +extern jl_gc_page_stack_t global_page_pool_clean; +extern jl_gc_page_stack_t global_page_pool_freed; #define GC_BACKOFF_MIN 4 #define GC_BACKOFF_MAX 12 @@ -211,28 +208,29 @@ STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT // get away with just using a CAS and not implementing some ABA // prevention mechanism since once a node is popped from the // `jl_gc_global_page_pool_t`, it may only be pushed back to them -// in the sweeping phase, which is serial +// in the sweeping phase, which also doesn't push a node into the +// same stack after it's popped -STATIC_INLINE void push_lf_page_metadata_back(jl_gc_global_page_pool_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT +STATIC_INLINE void push_lf_back(jl_gc_page_stack_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT { while (1) { - jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back); + jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom); elt->next = old_back; - if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, elt)) { + if (jl_atomic_cmpswap(&pool->bottom, &old_back, elt)) { break; } jl_cpu_pause(); } } -STATIC_INLINE jl_gc_pagemeta_t *pop_lf_page_metadata_back(jl_gc_global_page_pool_t *pool) JL_NOTSAFEPOINT +STATIC_INLINE jl_gc_pagemeta_t *pop_lf_back(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT { while (1) { - jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back); + jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom); if (old_back == NULL) { return NULL; } - if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, old_back->next)) { + if (jl_atomic_cmpswap(&pool->bottom, &old_back, old_back->next)) { return old_back; } jl_cpu_pause(); @@ -381,7 +379,7 @@ extern jl_gc_num_t gc_num; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; -extern int64_t lazy_freed_pages; +extern int64_t buffered_pages; extern int gc_first_tid; extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; @@ -445,12 +443,18 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE *list = hdr; } +extern uv_mutex_t gc_threads_lock; +extern uv_cond_t gc_threads_cond; extern _Atomic(int) gc_n_threads_marking; +extern _Atomic(int) gc_n_threads_sweeping; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT; void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT; void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq); void gc_mark_loop_serial(jl_ptls_t ptls); +void gc_mark_loop_parallel(jl_ptls_t ptls, int master); +void gc_sweep_pool_parallel(void); +void gc_free_pages(void); void sweep_stack_pools(void); void jl_gc_debug_init(void); diff --git a/src/julia_threads.h b/src/julia_threads.h index 4b39407c2b9e3..97a45c356656b 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -199,6 +199,10 @@ typedef struct { struct _jl_bt_element_t; struct _jl_gc_pagemeta_t; +typedef struct { + _Atomic(struct _jl_gc_pagemeta_t *) bottom; +} jl_gc_page_stack_t; + // This includes all the thread local states we care about for a thread. // Changes to TLS field types must be reflected in codegen. #define JL_MAX_BT_SIZE 80000 @@ -260,11 +264,12 @@ typedef struct _jl_tls_states_t { #endif jl_thread_t system_id; arraylist_t finalizers; - struct _jl_gc_pagemeta_t *page_metadata_allocd; - struct _jl_gc_pagemeta_t *page_metadata_lazily_freed; + jl_gc_page_stack_t page_metadata_allocd; + jl_gc_page_stack_t page_metadata_buffered; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; arraylist_t sweep_objs; + _Atomic(int64_t) gc_sweeps_requested; // Saved exception for previous *external* API call or NULL if cleared. // Access via jl_exception_occurred(). struct _jl_value_t *previous_exception; diff --git a/src/partr.c b/src/partr.c index 0194cd6a4b31e..0ee7af228f45a 100644 --- a/src/partr.c +++ b/src/partr.c @@ -108,16 +108,16 @@ void jl_init_threadinginfra(void) void JL_NORETURN jl_finish_task(jl_task_t *t); -extern uv_mutex_t gc_threads_lock; -extern uv_cond_t gc_threads_cond; -extern _Atomic(int) gc_n_threads_marking; -extern void gc_mark_loop_parallel(jl_ptls_t ptls, int master); - -static int may_mark(void) JL_NOTSAFEPOINT +static inline int may_mark(void) JL_NOTSAFEPOINT { return (jl_atomic_load(&gc_n_threads_marking) > 0); } +static inline int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT +{ + return (jl_atomic_load(&ptls->gc_sweeps_requested) > 0); +} + // gc thread function void jl_gc_threadfun(void *arg) { @@ -135,11 +135,17 @@ void jl_gc_threadfun(void *arg) while (1) { uv_mutex_lock(&gc_threads_lock); - while (!may_mark()) { + while (!may_mark() && !may_sweep(ptls)) { uv_cond_wait(&gc_threads_cond, &gc_threads_lock); } uv_mutex_unlock(&gc_threads_lock); - gc_mark_loop_parallel(ptls, 0); + if (may_mark()) { + gc_mark_loop_parallel(ptls, 0); + } + if (may_sweep(ptls)) { // not an else! + gc_sweep_pool_parallel(); + jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1); + } } }