diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 7041c15d23f..d44df88cce4 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -463,7 +463,7 @@ Customize Memory Allocators Get the memory block allocator of the specified domain. -.. c:function:: void PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) +.. c:function:: void PyMem_SetAllocator(PyMemAllocatorDomain domain, const PyMemAllocatorEx *allocator) Set the memory block allocator of the specified domain. diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 54a8ceb694a..3b902e9237a 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -12,13 +12,13 @@ extern "C" { /* GC information is stored BEFORE the object structure. */ typedef struct { + // Pointer to previous object in the list. + // Lowest three bits are used for flags documented later. + uintptr_t _gc_prev; + // Pointer to next object in the list. // 0 means the object is not tracked uintptr_t _gc_next; - - // Pointer to previous object in the list. - // Lowest two bits are used for flags documented later. - uintptr_t _gc_prev; } PyGC_Head; typedef struct { @@ -26,19 +26,29 @@ typedef struct { PyObject *_dict_or_values; PyObject *_weakref; } _PyGC_Preheader_UNUSED; +#define _PyGC_Head_UNUSED _PyGC_Preheader_UNUSED #define PyGC_Head_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4) +/* Bit 0 is set if the object is tracked by the GC */ +#define _PyGC_PREV_MASK_TRACKED (1) +/* Bit 1 is set when tp_finalize is called */ +#define _PyGC_PREV_MASK_FINALIZED (2) +/* Bit 2 is set when the object is not currently reachable */ +#define _PyGC_PREV_MASK_UNREACHABLE (4) +/* The (N-3) most significant bits contain the real address. */ +#define _PyGC_PREV_SHIFT (3) +#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) + static inline PyGC_Head* _Py_AS_GC(PyObject *op) { char *mem = _Py_STATIC_CAST(char*, op); return _Py_STATIC_CAST(PyGC_Head*, mem + PyGC_Head_OFFSET); } -#define _PyGC_Head_UNUSED _PyGC_Preheader_UNUSED /* True if the object is currently tracked by the GC. */ static inline int _PyObject_GC_IS_TRACKED(PyObject *op) { PyGC_Head *gc = _Py_AS_GC(op); - return (gc->_gc_next != 0); + return (gc->_gc_prev & _PyGC_PREV_MASK_TRACKED) != 0; } #define _PyObject_GC_IS_TRACKED(op) _PyObject_GC_IS_TRACKED(_Py_CAST(PyObject*, op)) @@ -54,16 +64,6 @@ static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) { return 1; } - -/* Bit flags for _gc_prev */ -/* Bit 0 is set when tp_finalize is called */ -#define _PyGC_PREV_MASK_FINALIZED (1) -/* Bit 1 is set when the object is in generation which is GCed currently. */ -#define _PyGC_PREV_MASK_COLLECTING (2) -/* The (N-2) most significant bits contain the real address. */ -#define _PyGC_PREV_SHIFT (2) -#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) - // Lowest bit of _gc_next is used for flags only in GC. // But it is always 0 for normal code. static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) { @@ -175,8 +175,6 @@ struct _gc_runtime_state { /* Is automatic collection enabled? */ int enabled; int debug; - /* linked lists of container objects */ - PyGC_Head head; /* a permanent generation which won't be collected */ struct gc_generation_stats stats; /* true if we are currently running the collector */ @@ -211,12 +209,16 @@ struct _gc_runtime_state { extern void _PyGC_InitState(struct _gc_runtime_state *); extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate); +extern void _PyGC_ResetHeap(void); static inline int _PyGC_ShouldCollect(struct _gc_runtime_state *gcstate) { Py_ssize_t live = _Py_atomic_load_ssize_relaxed(&gcstate->gc_live); - return live >= gcstate->gc_threshold && gcstate->enabled && gcstate->gc_threshold && !gcstate->collecting; + return (live >= gcstate->gc_threshold && + gcstate->enabled && + gcstate->gc_threshold && + !gcstate->collecting); } // Functions to clear types free lists diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 3b0cccd8fde..64adf90e8cd 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -140,18 +140,7 @@ static inline void _PyObject_GC_TRACK( filename, lineno, __func__); PyGC_Head *gc = _Py_AS_GC(op); - _PyObject_ASSERT_FROM(op, - (gc->_gc_prev & _PyGC_PREV_MASK_COLLECTING) == 0, - "object is in generation which is garbage collected", - filename, lineno, __func__); - - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyGC_Head *head = &interp->gc.head; - PyGC_Head *last = (PyGC_Head*)(head->_gc_prev); - _PyGCHead_SET_NEXT(last, gc); - _PyGCHead_SET_PREV(gc, last); - _PyGCHead_SET_NEXT(gc, head); - head->_gc_prev = (uintptr_t)gc; + gc->_gc_prev |= _PyGC_PREV_MASK_TRACKED; } /* Tell the GC to stop tracking this object. @@ -176,11 +165,16 @@ static inline void _PyObject_GC_UNTRACK( filename, lineno, __func__); PyGC_Head *gc = _Py_AS_GC(op); - PyGC_Head *prev = _PyGCHead_PREV(gc); - PyGC_Head *next = _PyGCHead_NEXT(gc); - _PyGCHead_SET_NEXT(prev, next); - _PyGCHead_SET_PREV(next, prev); - gc->_gc_next = 0; + if (gc->_gc_next != 0) { + PyGC_Head *prev = _PyGCHead_PREV(gc); + PyGC_Head *next = _PyGCHead_NEXT(gc); + + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + + gc->_gc_next = 0; + } + gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; } diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 35026932fc0..f0970b9b037 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -159,6 +159,11 @@ PyAPI_FUNC(void) _PyThreadState_Init( PyAPI_FUNC(void) _PyThreadState_DeleteExcept( _PyRuntimeState *runtime, PyThreadState *tstate); +PyAPI_FUNC(PyThreadState *) _PyThreadState_UnlinkExcept( + _PyRuntimeState *runtime, + PyThreadState *tstate, + int already_dead); +PyAPI_FUNC(void) _PyThreadState_DeleteGarbage(PyThreadState *garbage); static inline void _PyThreadState_Signal(PyThreadState *tstate, uintptr_t bit) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index f629321458d..ef38202f419 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1658,7 +1658,12 @@ class C(): pass # Issue #30817: Abort in PyErr_PrintEx() when no memory. # Span a large range of tests as the CPython code always evolves with # changes that add or remove memory allocations. - for i in range(1, 20): + # + # TODO(sgross): this test is flaky with the allocator changes. If the + # memory error happens during GC (such as from Py_FinalizeEx), it may + # fail with an assertion error because the list gc.garbage can't be + # created. + for i in range(1, 15): rc, out, err = script_helper.assert_python_failure("-c", code % i) self.assertIn(rc, (1, 120)) self.assertIn(b'MemoryError', err) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index d7df1ebb27f..9c2bd08f1c6 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -56,6 +56,7 @@ static PyObject * raiseTestError(const char* test_name, const char* msg) { PyErr_Format(TestError, "%s: %s", test_name, msg); + return NULL; } diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 0f41bfbd708..e522970dd90 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -32,9 +32,13 @@ #include "pycore_pymem.h" #include "pycore_pystate.h" #include "pycore_refcnt.h" +#include "pycore_gc.h" #include "frameobject.h" /* for PyFrame_ClearFreeList */ #include "pydtrace.h" +#include "mimalloc.h" +#include "mimalloc-internal.h" + typedef struct _gc_runtime_state GCState; /*[clinic input] @@ -50,43 +54,22 @@ module gc #define GC_NEXT _PyGCHead_NEXT #define GC_PREV _PyGCHead_PREV -// update_refs() set this bit for all objects in current generation. -// subtract_refs() and move_unreachable() uses this to distinguish -// visited object is in GCing or not. -// -// move_unreachable() removes this flag from reachable objects. -// Only unreachable objects have this flag. -// -// No objects in interpreter have this flag after GC ends. -#define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING - -// Lowest bit of _gc_next is used for UNREACHABLE flag. -// -// This flag represents the object is in unreachable list in move_unreachable() -// -// Although this flag is used only in move_unreachable(), move_unreachable() -// doesn't clear this flag to skip unnecessary iteration. -// move_legacy_finalizers() removes this flag instead. -// Between them, unreachable list is not normal list and we can not use -// most gc_list_* functions for it. -#define NEXT_MASK_UNREACHABLE (1) - /* Get an object's GC head */ #define AS_GC(o) ((PyGC_Head *)(((char *)(o))+PyGC_Head_OFFSET)) /* Get the object given the GC head */ #define FROM_GC(g) ((PyObject *)(((char *)(g))-PyGC_Head_OFFSET)) -static inline int -gc_is_collecting(PyGC_Head *g) +static inline void +gc_set_unreachable(PyGC_Head *g) { - return (g->_gc_prev & PREV_MASK_COLLECTING) != 0; + g->_gc_prev |= _PyGC_PREV_MASK_UNREACHABLE; } -static inline void -gc_clear_collecting(PyGC_Head *g) +static inline int +gc_is_unreachable(PyGC_Head *g) { - g->_gc_prev &= ~PREV_MASK_COLLECTING; + return (g->_gc_prev & _PyGC_PREV_MASK_UNREACHABLE) != 0; } static inline Py_ssize_t @@ -105,8 +88,8 @@ gc_set_refs(PyGC_Head *g, Py_ssize_t refs) static inline void gc_reset_refs(PyGC_Head *g, Py_ssize_t refs) { - g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED) - | PREV_MASK_COLLECTING + assert(!gc_is_unreachable(g)); // if so we should clear it??? + g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK) | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT); } @@ -152,8 +135,6 @@ _PyGC_InitState(GCState *gcstate) if (scale_str) { (void)_Py_str_to_int(scale_str, &gcstate->gc_scale); } - - gc_list_init(&gcstate->head); } @@ -269,7 +250,11 @@ gc_list_remove(PyGC_Head *node) _PyGCHead_SET_NEXT(prev, next); _PyGCHead_SET_PREV(next, prev); - node->_gc_next = 0; /* object is not currently tracked */ + /* object is not currently tracked */ + // assert((node->_gc_prev & _PyGC_PREV_MASK_TRACKED) == 0); + + node->_gc_next = 0; + node->_gc_prev &= (_PyGC_PREV_MASK_TRACKED | _PyGC_PREV_MASK_FINALIZED); } /* Move `node` from the gc list it's currently in (which is not explicitly @@ -315,6 +300,19 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to) gc_list_init(from); } +static void +gc_list_clear(PyGC_Head *list) +{ + PyGC_Head *gc = GC_NEXT(list); + while (gc != list) { + PyGC_Head *next = GC_NEXT(gc); + gc->_gc_next = 0; + gc->_gc_prev &= ~_PyGC_PREV_MASK; + gc = next; + } + gc_list_init(list); +} + static Py_ssize_t gc_list_size(PyGC_Head *list) { @@ -326,39 +324,177 @@ gc_list_size(PyGC_Head *list) return n; } -/* Walk the list and mark all objects as non-collecting */ -static inline void -gc_list_clear_collecting(PyGC_Head *collectable) +/* Append objects in a GC list to a Python list. + * Return 0 if all OK, < 0 if error (out of memory for list) */ + +static Py_ssize_t +_Py_GC_REFCNT(PyObject *op) { - PyGC_Head *gc; - for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { - gc_clear_collecting(gc); - } + Py_ssize_t local, shared; + int immortal; + + _PyRef_UnpackLocal(op->ob_ref_local, &local, &immortal); + _PyRef_UnpackShared(op->ob_ref_shared, &shared, NULL, NULL); + + assert(!immortal); + + return local + shared; } -/* Append objects in a GC list to a Python list. - * Return 0 if all OK, < 0 if error (out of memory for list) +typedef int (gc_visit_fn)(PyGC_Head* gc, void *arg); + +/* True if memory is allocated by the debug allocator. + * See obmalloc.c */ +static int using_debug_allocator; + +static int +visit_page(const mi_page_t* page, gc_visit_fn* visitor, void *arg) +{ + mi_segment_t* segment = _mi_page_segment(page); + size_t block_size = page->xblock_size; + uint8_t *data = _mi_page_start(segment, page, NULL); + // printf("visiting page %p of size %zu capacity %d debug=%d\n", page, block_size, (int)page->capacity, (int)using_debug_allocator); + for (int i = 0, end = page->capacity; i != end; i++) { + uint8_t *p = data + i * block_size; + if (using_debug_allocator) { + /* The debug allocator sticks two words before each allocation. + * When the allocation is active, the low bit of the first word + * is set. + */ + /* TODO(sgross): update and handle debug allocator in obmalloc.c */ + size_t *size_prefix = (size_t*)p; + if (!(*size_prefix & 1)) { + continue; + } + p += 2 * sizeof(size_t); + } + PyGC_Head *gc = (PyGC_Head *)p; + if (gc->_gc_prev & _PyGC_PREV_MASK_TRACKED) { + int err = (*visitor)(gc, arg); + if (err) { + return err; + } + } + } + return 0; +} + +static int +visit_segments(mi_segment_t* segment, gc_visit_fn* visitor, void *arg) +{ + while (segment) { + const mi_slice_t* end; + mi_slice_t* slice = mi_slices_start_iterate(segment, &end); + while (slice < end) { + if (slice->xblock_size > 0) { + mi_page_t* const page = mi_slice_to_page(slice); + if (page->tag == mi_heap_tag_gc) { + int err = visit_page(page, visitor, arg); + if (err) { + return err; + } + } + } + slice = slice + slice->slice_count; + } + segment = segment->abandoned_next; + } + return 0; +} + static int -append_objects(PyObject *py_list, PyGC_Head *gc_list) +visit_heap(mi_heap_t *heap, gc_visit_fn* visitor, void *arg) { - PyGC_Head *gc; - for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - if (op != py_list) { - if (PyList_Append(py_list, op)) { - return -1; /* exception */ + if (!heap || heap->visited || heap->page_count == 0) { + return 0; + } + + for (size_t i = 0; i <= MI_BIN_FULL; i++) { + const mi_page_queue_t *pq = &heap->pages[i]; + mi_page_t *page = pq->first; + while (page != NULL) { + assert(page->tag == mi_heap_tag_gc); + int err = visit_page(page, visitor, arg); + if (err) { + return err; + } + page = page->next; + } + } + + heap->visited = true; + return 0; +} + +static int +visit_heaps(gc_visit_fn* visitor, void *arg) +{ + int err = 0; + _PyRuntimeState *runtime = &_PyRuntime; + + HEAD_LOCK(runtime); + PyInterpreterState *head = _PyRuntime.interpreters.head; + for (PyInterpreterState *interp = head; interp != NULL; interp = interp->next) { + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + int err = visit_heap(p->heaps[mi_heap_tag_gc], visitor, arg); + if (err) { + goto end; + } + } + } + + err = visit_segments(_mi_segment_abandoned(), visitor, arg); + if (err) { + goto end; + } + + err = visit_segments(_mi_segment_abandoned_visited(), visitor, arg); + if (err) { + goto end; + } + +end: + for (PyInterpreterState *interp = head; interp != NULL; interp = interp->next) { + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + if (p->heaps[mi_heap_tag_gc]) { + p->heaps[mi_heap_tag_gc]->visited = false; } } } + + HEAD_UNLOCK(runtime); + return err; +} + +struct find_object_args { + PyObject *op; + int found; +}; + +static int +find_object_visitor(PyGC_Head* gc, void *arg) +{ + struct find_object_args *args = (struct find_object_args *)arg; + if (FROM_GC(gc) == args->op) { + args->found = 1; + } return 0; } +int +_PyGC_find_object(PyObject *op) +{ + struct find_object_args args; + args.op = op; + args.found = 0; + visit_heaps(find_object_visitor, &args); + return args.found; +} + // Constants for validate_list's flags argument. -enum flagstates {collecting_clear_unreachable_clear, - collecting_clear_unreachable_set, - collecting_set_unreachable_clear, - collecting_set_unreachable_set}; +enum flagstates {unreachable_clear, + unreachable_set}; #ifdef GC_DEBUG // validate_list checks list consistency. And it works as document @@ -373,103 +509,201 @@ enum flagstates {collecting_clear_unreachable_clear, static void validate_list(PyGC_Head *head, enum flagstates flags) { - assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0); - assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0); - uintptr_t prev_value = 0, next_value = 0; + assert(!gc_is_unreachable(head)); + uintptr_t prev_mask = 0, prev_value = 0; switch (flags) { - case collecting_clear_unreachable_clear: - break; - case collecting_set_unreachable_clear: - prev_value = PREV_MASK_COLLECTING; - break; - case collecting_clear_unreachable_set: - next_value = NEXT_MASK_UNREACHABLE; + case unreachable_clear: + prev_mask = _PyGC_PREV_MASK_UNREACHABLE; + prev_value = 0; break; - case collecting_set_unreachable_set: - prev_value = PREV_MASK_COLLECTING; - next_value = NEXT_MASK_UNREACHABLE; + case unreachable_set: + prev_mask = _PyGC_PREV_MASK_UNREACHABLE; + prev_value = _PyGC_PREV_MASK_UNREACHABLE; break; default: assert(! "bad internal flags argument"); } PyGC_Head *prev = head; PyGC_Head *gc = GC_NEXT(head); + int n = 0; while (gc != head) { PyGC_Head *trueprev = GC_PREV(gc); - PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); + PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next); assert(truenext != NULL); assert(trueprev == prev); - assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); - assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value); + assert((gc->_gc_prev & prev_mask) == prev_value); + assert((gc->_gc_next & 3) == 0); prev = gc; gc = truenext; + n++; } assert(prev == GC_PREV(head)); } + +static int +validate_refcount_visitor(PyGC_Head* gc, void *arg) +{ + assert(_Py_GC_REFCNT(FROM_GC(gc)) > 0); + return 0; +} + +static void +validate_refcount(void) +{ + visit_heaps(validate_refcount_visitor, NULL); +} + +struct validate_tracked_args { + uintptr_t mask; + uintptr_t expected; +}; + +static int +validate_tracked_visitor(PyGC_Head* gc, void *void_arg) +{ + struct validate_tracked_args *arg = (struct validate_tracked_args*)void_arg; + assert((gc->_gc_prev & arg->mask) == arg->expected); + assert(gc->_gc_next == 0); + assert(_PyGCHead_PREV(gc) == NULL); + assert(_Py_GC_REFCNT(FROM_GC(gc)) > 0); + return 0; +} + +static void +validate_tracked_heap(uintptr_t mask, uintptr_t expected) +{ + struct validate_tracked_args args; + args.mask = mask; + args.expected = expected; + visit_heaps(validate_tracked_visitor, &args); +} #else #define validate_list(x, y) do{}while(0) +#define validate_refcount() do{}while(0) +#define validate_tracked_heap(x,y) do{}while(0) #endif -/*** end of list stuff ***/ +static int +reset_heap_visitor(PyGC_Head *gc, void *void_arg) +{ + gc->_gc_prev = 0; + return 0; +} -static Py_ssize_t -_Py_GC_REFCNT(PyObject *op) +void +_PyGC_ResetHeap(void) { - Py_ssize_t local, shared; - int immortal; + // NOTE: _PyGC_Initialize may be called multiple times. For example, + // _test_embed triggers multiple GC initializations, including some + // after _Py_Initialize failures. Since _Py_Initialize clears _PyRuntime + // we have no choice but to leak all PyObjects. + // TODO(sgross): should we drop mi_heap here instead? + visit_heaps(reset_heap_visitor, NULL); +} - _PyRef_UnpackLocal(op->ob_ref_local, &local, &immortal); - _PyRef_UnpackShared(op->ob_ref_shared, &shared, NULL, NULL); +struct update_refs_args { + PyGC_Head *list; + Py_ssize_t size; +}; - assert(!immortal); +static int +update_refs_visitor(PyGC_Head *gc, void *void_arg) +{ + struct update_refs_args *args = (struct update_refs_args *)void_arg; + PyGC_Head *list = args->list; + assert(gc->_gc_prev & _PyGC_PREV_MASK_TRACKED); + + Py_ssize_t refcount = _Py_GC_REFCNT(FROM_GC(gc)); + /* THIS IS NO LONGER TRUE: + * Python's cyclic gc should never see an incoming refcount + * of 0: if something decref'ed to 0, it should have been + * deallocated immediately at that time. + * Possible cause (if the assert triggers): a tp_dealloc + * routine left a gc-aware object tracked during its teardown + * phase, and did something-- or allowed something to happen -- + * that called back into Python. gc can trigger then, and may + * see the still-tracked dying object. Before this assert + * was added, such mistakes went on to allow gc to try to + * delete the object again. In a debug build, that caused + * a mysterious segfault, when _Py_ForgetReference tried + * to remove the object from the doubly-linked list of all + * objects a second time. In a release build, an actual + * double deallocation occurred, which leads to corruption + * of the allocator's internal bookkeeping pointers. That's + * so serious that maybe this should be a release-build + * check instead of an assert? + */ + _PyObject_ASSERT(FROM_GC(gc), refcount >= 0); - return local + shared; + gc_reset_refs(gc, refcount); + + PyGC_Head *prev = (PyGC_Head *)list->_gc_prev; + prev->_gc_next = (uintptr_t)gc; + gc->_gc_next = (uintptr_t)list; + list->_gc_prev = (uintptr_t)gc; + args->size++; + return 0; } /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and - * PREV_MASK_COLLECTING bit is set for all objects in containers. + * GC_COLLECTING_MASK bit is set for all objects in containers. + */ +static Py_ssize_t +update_refs(PyGC_Head *young) +{ + struct update_refs_args args; + args.list = young; + args.size = 0; + visit_heaps(update_refs_visitor, &args); + return args.size; +} + +/* A traversal callback for subtract_refs. */ +static int +visit_decref(PyObject *op, void *arg) +{ + if (_PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + // We're only interested in gc_refs for tracked objects. + if (gc->_gc_prev & _PyGC_PREV_MASK_TRACKED) { + _PyObject_ASSERT(FROM_GC(gc), gc->_gc_next != 0); + gc_decref(gc); + } + } + return 0; +} + +/* Subtract internal references from gc_refs. After this, gc_refs is >= 0 + * for all objects in containers, and is GC_REACHABLE for all tracked gc + * objects not in containers. The ones with gc_refs > 0 are directly + * reachable from outside containers, and so can't be collected. */ static void -update_refs(PyGC_Head *containers) +subtract_refs(PyGC_Head *containers) { + traverseproc traverse; PyGC_Head *gc = GC_NEXT(containers); for (; gc != containers; gc = GC_NEXT(gc)) { - gc_reset_refs(gc, _Py_GC_REFCNT(FROM_GC(gc))); - /* Python's cyclic gc should never see an incoming refcount - * of 0: if something decref'ed to 0, it should have been - * deallocated immediately at that time. - * Possible cause (if the assert triggers): a tp_dealloc - * routine left a gc-aware object tracked during its teardown - * phase, and did something-- or allowed something to happen -- - * that called back into Python. gc can trigger then, and may - * see the still-tracked dying object. Before this assert - * was added, such mistakes went on to allow gc to try to - * delete the object again. In a debug build, that caused - * a mysterious segfault, when _Py_ForgetReference tried - * to remove the object from the doubly-linked list of all - * objects a second time. In a release build, an actual - * double deallocation occurred, which leads to corruption - * of the allocator's internal bookkeeping pointers. That's - * so serious that maybe this should be a release-build - * check instead of an assert? - */ - _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); + PyObject *op = FROM_GC(gc); + traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + (visitproc)visit_decref, + op); } } /* A traversal callback for subtract_refs. */ static int -visit_decref(PyObject *op, void *parent) +visit_decref_unreachable(PyObject *op, void *data) { - _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op)); - - if (_PyObject_IS_GC(op)) { + assert(op != NULL); + if (PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); /* We're only interested in gc_refs for objects in the * generation being collected, which can be recognized * because only they have positive gc_refs. */ - if (gc_is_collecting(gc)) { + if (gc_is_unreachable(gc)) { gc_decref(gc); } } @@ -482,16 +716,15 @@ visit_decref(PyObject *op, void *parent) * reachable from outside containers, and so can't be collected. */ static void -subtract_refs(PyGC_Head *containers) +subtract_refs_unreachable(PyGC_Head *containers) { traverseproc traverse; PyGC_Head *gc = GC_NEXT(containers); for (; gc != containers; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - (visitproc)visit_decref, - op); + traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; + (void) traverse(FROM_GC(gc), + (visitproc)visit_decref_unreachable, + NULL); } } @@ -506,18 +739,15 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) PyGC_Head *gc = AS_GC(op); const Py_ssize_t gc_refs = gc_get_refs(gc); - // Ignore objects in other generation. - // This also skips objects "to the left" of the current position in - // move_unreachable's scan of the 'young' list - they've already been - // traversed, and no longer have the PREV_MASK_COLLECTING flag. - if (! gc_is_collecting(gc)) { + // Ignore untracked objects and objects in other generation. + // NOTE: there is a combination of bugs we have to beware of here. After + // a fork, we lost track of the heaps from other threads. They're not properly + // abandoned, so visit_heap doesn't see them. + if (gc->_gc_next == 0) { return 0; } - // It would be a logic error elsewhere if the collecting flag were set on - // an untracked object. - assert(gc->_gc_next != 0); - if (gc->_gc_next & NEXT_MASK_UNREACHABLE) { + if (gc_is_unreachable(gc)) { /* This had gc_refs = 0 when move_unreachable got * to it, but turns out it's reachable after all. * Move it back to move_unreachable's 'young' list, @@ -527,16 +757,18 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) // Manually unlink gc from unreachable list because the list functions // don't work right in the presence of NEXT_MASK_UNREACHABLE flags. PyGC_Head *prev = GC_PREV(gc); - PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); - _PyObject_ASSERT(FROM_GC(prev), - prev->_gc_next & NEXT_MASK_UNREACHABLE); - _PyObject_ASSERT(FROM_GC(next), - next->_gc_next & NEXT_MASK_UNREACHABLE); - prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE + PyGC_Head *next = (PyGC_Head*)gc->_gc_next; + + // TODO: can't do these asserts because prev/next may be list head + //_PyObject_ASSERT(FROM_GC(prev), gc_is_unreachable(prev)); + //_PyObject_ASSERT(FROM_GC(next), gc_is_unreachable(next)); + + prev->_gc_next = gc->_gc_next; _PyGCHead_SET_PREV(next, prev); gc_list_append(gc, reachable); gc_set_refs(gc, 1); + gc->_gc_prev &= ~_PyGC_PREV_MASK_UNREACHABLE; } else if (gc_refs == 0) { /* This is in move_unreachable's 'young' list, but @@ -544,6 +776,7 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) * we need to do is tell move_unreachable that it's * reachable. */ + assert((gc->_gc_next & ~3) != 0); gc_set_refs(gc, 1); } /* Else there's nothing to do. @@ -551,7 +784,7 @@ visit_reachable(PyObject *op, PyGC_Head *reachable) * list, and move_unreachable will eventually get to it. */ else { - _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small"); + _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0, "refcount is too small"); } return 0; } @@ -605,8 +838,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) (void *)young); // relink gc_prev to prev element. _PyGCHead_SET_PREV(gc, prev); - // gc is not COLLECTING state after here. - gc_clear_collecting(gc); prev = gc; } else { @@ -629,17 +860,17 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // But this may pollute the unreachable list head's 'next' pointer // too. That's semantically senseless but expedient here - the // damage is repaired when this function ends. - last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); + last->_gc_next = (uintptr_t)gc; _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); + gc->_gc_next = (uintptr_t)unreachable; unreachable->_gc_prev = (uintptr_t)gc; + gc_set_unreachable(gc); + assert(last == _PyGCHead_PREV(gc)); } gc = (PyGC_Head*)prev->_gc_next; } // young->_gc_prev must be last element remained in the list. young->_gc_prev = (uintptr_t)prev; - // don't let the pollution of the list head's next pointer leak - unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; } static void @@ -687,7 +918,6 @@ static void move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) { PyGC_Head *gc, *next; - assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); /* March over unreachable. Move objects with finalizers into * `finalizers`. @@ -695,13 +925,12 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyObject *op = FROM_GC(gc); - _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); - gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + _PyObject_ASSERT(op, gc_is_unreachable(gc)); next = (PyGC_Head*)gc->_gc_next; if (has_legacy_finalizer(op)) { - gc_clear_collecting(gc); gc_list_move(gc, finalizers); + gc->_gc_prev &= ~_PyGC_PREV_MASK_UNREACHABLE; } } } @@ -710,16 +939,12 @@ static inline void clear_unreachable_mask(PyGC_Head *unreachable) { /* Check that the list head does not have the unreachable bit set */ - assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); - PyGC_Head *gc, *next; - assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); - gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + gc->_gc_prev &= ~_PyGC_PREV_MASK_UNREACHABLE; next = (PyGC_Head*)gc->_gc_next; } - validate_list(unreachable, collecting_set_unreachable_clear); + // validate_list(unreachable, unreachable_clear); } /* A traversal callback for move_legacy_finalizer_reachable. */ @@ -728,9 +953,9 @@ visit_move(PyObject *op, PyGC_Head *tolist) { if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); - if (gc_is_collecting(gc)) { + if (gc_is_unreachable(gc)) { gc_list_move(gc, tolist); - gc_clear_collecting(gc); + gc->_gc_prev &= ~_PyGC_PREV_MASK_UNREACHABLE; } } return 0; @@ -765,11 +990,10 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) * no object in `unreachable` is weakly referenced anymore. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +handle_weakrefs(PyGC_Head *unreachable) { PyGC_Head *gc; PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ PyGC_Head *next; int num_freed = 0; @@ -815,10 +1039,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) */ wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. - */ + PyWeakReference *wr; for (wr = *wrlist; wr != NULL; wr = *wrlist) { PyGC_Head *wrasgc; /* AS_GC(wr) */ @@ -829,6 +1050,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + if (wr->wr_callback == NULL) { /* no callback */ continue; @@ -862,7 +1084,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * to imagine how calling it later could create a problem for us. wr * is moved to wrcb_to_call in this case. */ - if (gc_is_collecting(AS_GC(wr))) { + if (gc_is_unreachable(AS_GC(wr))) { /* it should already have been cleared above */ assert(wr->wr_object == Py_None); continue; @@ -878,7 +1100,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) assert(wrasgc != next); /* wrasgc is reachable, but next isn't, so they can't be the same */ - gc_list_move(wrasgc, &wrcb_to_call); + assert(_PyGCHead_NEXT(wrasgc) == NULL); + assert(_PyGCHead_PREV(wrasgc) == NULL); + + gc_list_append(wrasgc, &wrcb_to_call); + // FIXME: need to set collecting???? } } @@ -892,7 +1118,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) gc = (PyGC_Head*)wrcb_to_call._gc_next; op = FROM_GC(gc); _PyObject_ASSERT(op, PyWeakref_Check(op)); - wr = (PyWeakReference *)op; + PyWeakReference *wr = (PyWeakReference *)op; callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); @@ -917,7 +1143,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) Py_DECREF(op); if (wrcb_to_call._gc_next == (uintptr_t)gc) { /* object is still alive -- move it */ - gc_list_move(gc, old); + gc_list_remove(gc); } else { ++num_freed; @@ -944,7 +1170,7 @@ debug_cycle(const char *msg, PyObject *op) static void handle_legacy_finalizers(PyThreadState *tstate, GCState *gcstate, - PyGC_Head *finalizers, PyGC_Head *old) + PyGC_Head *finalizers) { assert(!_PyErr_Occurred(tstate)); assert(gcstate->garbage != NULL); @@ -961,7 +1187,7 @@ handle_legacy_finalizers(PyThreadState *tstate, } } - gc_list_merge(finalizers, old); + gc_list_clear(finalizers); } static void @@ -1029,14 +1255,28 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) */ static void delete_garbage(PyThreadState *tstate, GCState *gcstate, - PyGC_Head *collectable, PyGC_Head *old) + PyGC_Head *collectable) { assert(!_PyErr_Occurred(tstate)); + for (PyGC_Head *gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + _PyObject_ASSERT_WITH_MSG(op, _Py_GC_REFCNT(op) > 0, + "refcount is too small"); + } + while (!gc_list_is_empty(collectable)) { PyGC_Head *gc = GC_NEXT(collectable); PyObject *op = FROM_GC(gc); + // if (_Py_GC_REFCNT(op) <= 0) { + // assert(0); + // printf("_Py_GC_REFCNT: %d\n", (int)_Py_GC_REFCNT(op)); + // printf("ob: %p local %d shared %d queued %d merged %d\n", + // op, op->ob_ref_local, op->ob_ref_shared.counter, + // op->ob_ref_shared.queued, op->ob_ref_shared.merged); + // } + _PyObject_ASSERT_WITH_MSG(op, _Py_GC_REFCNT(op) > 0, "refcount is too small"); @@ -1050,18 +1290,19 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, inquiry clear; if ((clear = Py_TYPE(op)->tp_clear) != NULL) { Py_INCREF(op); + // printf("clearing %p (op=%p)\n", gc, op); (void) clear(op); if (_PyErr_Occurred(tstate)) { _PyErr_WriteUnraisableMsg("in tp_clear of", (PyObject*)Py_TYPE(op)); } + // printf("refcnt after clear of %p = %d\n", gc, (int)_Py_GC_REFCNT(op)); Py_DECREF(op); } } if (GC_NEXT(collectable) == gc) { /* object is still alive, move it, it may die later */ - gc_clear_collecting(gc); - gc_list_move(gc, old); + gc_list_remove(gc); } } } @@ -1111,15 +1352,6 @@ by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ static inline void deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { - validate_list(base, collecting_clear_unreachable_clear); - /* Using ob_refcnt and gc_refs, calculate which objects in the - * container set are reachable from outside the set (i.e., have a - * refcount greater than 0 when all the references within the - * set are taken into account). - */ - update_refs(base); // gc_prev is used for gc_refs - subtract_refs(base); - /* Leave everything reachable from outside base in base, and move * everything else (in base) to unreachable. * @@ -1157,8 +1389,8 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { */ gc_list_init(unreachable); move_unreachable(base, unreachable); // gc_prev is pointer again - validate_list(base, collecting_clear_unreachable_clear); - validate_list(unreachable, collecting_set_unreachable_set); + validate_list(base, unreachable_clear); + validate_list(unreachable, unreachable_set); } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1175,22 +1407,28 @@ IMPORTANT: After a call to this function, the 'still_unreachable' set will have PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so we can skip the expense of clearing the flag to avoid extra iteration. */ static inline void -handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, - PyGC_Head *old_generation) +handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable) { - // Remove the PREV_MASK_COLLECTING from unreachable - // to prepare it for a new call to 'deduce_unreachable' - gc_list_clear_collecting(unreachable); + validate_list(unreachable, unreachable_set); + + // First reset the reference count for unreachable objects + PyGC_Head *gc; + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = GC_NEXT(gc)) { + Py_ssize_t refcnt = _Py_GC_REFCNT(FROM_GC(gc)); + gc_set_refs(gc, refcnt); + _PyObject_ASSERT(FROM_GC(gc), refcnt > 0); + } + + subtract_refs_unreachable(unreachable); + clear_unreachable_mask(unreachable); // After the call to deduce_unreachable, the 'still_unreachable' set will // have the PREV_MARK_COLLECTING set, but the objects are going to be // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; deduce_unreachable(resurrected, still_unreachable); - clear_unreachable_mask(still_unreachable); - // Move the resurrected objects to the old generation for future collection. - gc_list_merge(resurrected, old_generation); + gc_list_clear(resurrected); } static void @@ -1213,8 +1451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, { Py_ssize_t m = 0; /* # objects collected */ Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ - PyGC_Head *young; /* the generation we are examining */ - PyGC_Head *old; /* next older generation */ + PyGC_Head young; /* the generation we are examining */ PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ PyGC_Head *gc; @@ -1225,12 +1462,14 @@ gc_collect_main(PyThreadState *tstate, int generation, // or after _PyGC_Fini() assert(gcstate->garbage != NULL); assert(!_PyErr_Occurred(tstate)); + using_debug_allocator = _PyMem_DebugEnabled(); if (gcstate->debug & DEBUG_STATS) { + // FIXME: how many tracked objects? PySys_WriteStderr("gc: collecting heap...\n"); PySys_FormatStderr( "gc: tracked objects: %"PY_FORMAT_SIZE_T"d\n", - gc_list_size(&gcstate->head)); + 9000); t1 = _PyTime_GetMonotonicClock(); } @@ -1240,17 +1479,18 @@ gc_collect_main(PyThreadState *tstate, int generation, /* explicitly merge refcnts all queued objects */ merge_queued_objects(); - /* handy references */ - young = old = &gcstate->head; - validate_list(old, collecting_clear_unreachable_clear); + validate_tracked_heap(_PyGC_PREV_MASK_UNREACHABLE, 0); - deduce_unreachable(young, &unreachable); - - untrack_tuples(young); - untrack_dicts(young); + gc_list_init(&young); + update_refs(&young); + subtract_refs(&young); + deduce_unreachable(&young, &unreachable); + untrack_tuples(&young); + untrack_dicts(&young); gcstate->long_lived_pending = 0; - gcstate->long_lived_total = gc_list_size(young); + gcstate->long_lived_total = gc_list_size(&young); + gc_list_clear(&young); /* All objects in unreachable are trash, but objects reachable from * legacy finalizers (e.g. tp_del) can't safely be deleted. @@ -1259,14 +1499,15 @@ gc_collect_main(PyThreadState *tstate, int generation, // NEXT_MASK_UNREACHABLE is cleared here. // After move_legacy_finalizers(), unreachable is normal list. move_legacy_finalizers(&unreachable, &finalizers); + // printf("finalizers size %d\n", (int)gc_list_size( &finalizers)); /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. */ move_legacy_finalizer_reachable(&finalizers); - validate_list(&finalizers, collecting_clear_unreachable_clear); - validate_list(&unreachable, collecting_set_unreachable_clear); + validate_list(&finalizers, unreachable_clear); + validate_list(&unreachable, unreachable_set); /* Print debugging information. */ if (gcstate->debug & DEBUG_COLLECTABLE) { @@ -1276,26 +1517,29 @@ gc_collect_main(PyThreadState *tstate, int generation, } /* Clear weakrefs and invoke callbacks as necessary. */ - m += handle_weakrefs(&unreachable, old); + m += handle_weakrefs(&unreachable); - validate_list(old, collecting_clear_unreachable_clear); - validate_list(&unreachable, collecting_set_unreachable_clear); + validate_list(&unreachable, unreachable_set); /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); + validate_refcount(); + /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ PyGC_Head final_unreachable; - handle_resurrected_objects(&unreachable, &final_unreachable, old); + handle_resurrected_objects(&unreachable, &final_unreachable); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ m += gc_list_size(&final_unreachable); - delete_garbage(tstate, gcstate, &final_unreachable, old); + delete_garbage(tstate, gcstate, &final_unreachable); + + validate_refcount(); /* Collect statistics on uncollectable objects found and print * debugging information. */ @@ -1315,8 +1559,7 @@ gc_collect_main(PyThreadState *tstate, int generation, * reachable list of garbage. The programmer has to deal with * this if they insist on creating this type of structure. */ - handle_legacy_finalizers(tstate, gcstate, &finalizers, old); - validate_list(old, collecting_clear_unreachable_clear); + handle_legacy_finalizers(tstate, gcstate, &finalizers); /* Clear free list only during the collection of the highest * generation */ @@ -1352,6 +1595,7 @@ gc_collect_main(PyThreadState *tstate, int generation, PyDTrace_GC_DONE(n + m); } + validate_tracked_heap(_PyGC_PREV_MASK_UNREACHABLE, 0); assert(!_PyErr_Occurred(tstate)); return n + m; } @@ -1603,23 +1847,29 @@ referrersvisit(PyObject* obj, PyObject *objs) return 0; } +struct gc_referrers_arg { + PyObject *objs; + PyObject *resultlist; +}; + static int -gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) +gc_referrers_visitor(PyGC_Head *gc, void *void_arg) { - PyGC_Head *gc; - PyObject *obj; - traverseproc traverse; - for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { - obj = FROM_GC(gc); - traverse = Py_TYPE(obj)->tp_traverse; - if (obj == objs || obj == resultlist) - continue; - if (traverse(obj, (visitproc)referrersvisit, objs)) { - if (PyList_Append(resultlist, obj) < 0) - return 0; /* error */ + struct gc_referrers_arg *arg = (struct gc_referrers_arg*)void_arg; + PyObject *objs = arg->objs; + PyObject *resultlist = arg->resultlist; + + PyObject *obj = FROM_GC(gc); + traverseproc traverse = Py_TYPE(obj)->tp_traverse; + if (obj == objs || obj == resultlist) { + return 0; + } + if (traverse(obj, (visitproc)referrersvisit, objs)) { + if (PyList_Append(resultlist, obj) < 0) { + return -1; /* error */ } } - return 1; /* no error */ + return 0; } PyDoc_STRVAR(gc_get_referrers__doc__, @@ -1638,8 +1888,10 @@ gc_get_referrers(PyObject *self, PyObject *args) return NULL; } - GCState *gcstate = get_gc_state(); - if (!(gc_referrers_for(args, &gcstate->head, result))) { + struct gc_referrers_arg arg; + arg.objs = args; + arg.resultlist = result; + if (visit_heaps(gc_referrers_visitor, &arg) < 0) { Py_DECREF(result); return NULL; } @@ -1686,6 +1938,28 @@ gc_get_referents(PyObject *self, PyObject *args) return result; } +struct gc_get_objects_arg { + PyObject *py_list; + Py_ssize_t generation; +}; + +static int +gc_get_objects_visitor(PyGC_Head *gc, void *void_arg) +{ + PyObject *op = FROM_GC(gc); + + struct gc_get_objects_arg *arg = (struct gc_get_objects_arg*)void_arg; + PyObject *py_list = arg->py_list; + + if (op == py_list) { + return 0; + } + if (PyList_Append(py_list, op)) { + return -1; + } + return 0; +} + /*[clinic input] gc.get_objects generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None @@ -1701,9 +1975,7 @@ static PyObject * gc_get_objects_impl(PyObject *module, Py_ssize_t generation) /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/ { - PyThreadState *tstate = _PyThreadState_GET(); PyObject* result; - GCState *gcstate = &tstate->interp->gc; if (PySys_Audit("gc.get_objects", "n", generation) < 0) { return NULL; @@ -1714,24 +1986,25 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation) return NULL; } - /* If generation is passed, we extract only that generation */ - if (generation != -1) { - if (generation >= NUM_GENERATIONS) { - _PyErr_Format(tstate, PyExc_ValueError, - "generation parameter must be less than the number of " - "available generations (%i)", - NUM_GENERATIONS); - goto error; - } + if (generation >= NUM_GENERATIONS) { + PyErr_Format(PyExc_ValueError, + "generation parameter must be less than the number of " + "available generations (%i)", + NUM_GENERATIONS); + goto error; + } - if (generation < 0) { - _PyErr_SetString(tstate, PyExc_ValueError, - "generation parameter cannot be negative"); - goto error; - } + /* If generation is passed, we extract only that generation */ + if (generation < -1) { + PyErr_SetString(PyExc_ValueError, + "generation parameter cannot be negative"); + goto error; } - if (append_objects(result, &gcstate->head)) { + struct gc_get_objects_arg arg; + arg.py_list = result; + arg.generation = generation + 1; + if (visit_heaps(gc_get_objects_visitor, &arg) < 0) { goto error; } @@ -2005,7 +2278,7 @@ PyGC_Collect(void) } Py_ssize_t n; - if (gcstate->collecting) { + if (gcstate->collecting || tstate->cant_stop_wont_stop) { /* already collecting, don't do anything */ n = 0; } @@ -2034,7 +2307,6 @@ _PyGC_CollectNoFail(PyThreadState *tstate) if (gcstate->collecting) { return 0; } - Py_ssize_t n; gcstate->collecting = 1; n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1); @@ -2222,7 +2494,8 @@ gc_alloc(size_t basicsize, size_t presize) return _PyErr_NoMemory(tstate); } size_t size = presize + basicsize; - char *mem = PyObject_Malloc(size); + PyMemAllocatorEx *a = &_PyRuntime.allocators.standard.gc; + char *mem = a->malloc(a->ctx, size); if (mem == NULL) { return _PyErr_NoMemory(tstate); } @@ -2274,8 +2547,9 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) return (PyVarObject *)PyErr_NoMemory(); } + PyMemAllocatorEx *a = &_PyRuntime.allocators.standard.gc; char *mem = (char *)op - presize; - mem = PyObject_Realloc(mem, presize + basicsize); + mem = a->realloc(a->ctx, mem, presize + basicsize); if (mem == NULL) return (PyVarObject *)PyErr_NoMemory(); op = (PyVarObject *) (mem + presize); @@ -2300,7 +2574,8 @@ PyObject_GC_Del(void *op) } GCState *gcstate = get_gc_state(); gcstate->gc_live--; - PyObject_Free(((char *)op)-presize); + PyMemAllocatorEx *a = &_PyRuntime.allocators.standard.gc; + a->free(a->ctx, ((char *)op)-presize); } int diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 1723b9f4f91..9be9529fabd 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -624,6 +624,8 @@ PyOS_AfterFork_Child(void) goto fatal_error; } + PyThreadState *garbage = _PyThreadState_UnlinkExcept(runtime, tstate, 1); + status = _PyInterpreterState_DeleteExceptMain(runtime); if (_PyStatus_EXCEPTION(status)) { goto fatal_error; @@ -635,6 +637,10 @@ PyOS_AfterFork_Child(void) goto fatal_error; } + // Now that we're in a good state we can delete the dead thread states. + // This may call arbitrary Python code from destructors. + _PyThreadState_DeleteGarbage(garbage); + run_at_forkers(tstate->interp->after_forkers_child, 0); return; diff --git a/Objects/mimalloc/init.c b/Objects/mimalloc/init.c index 2a83ea42f04..45fd7be6081 100644 --- a/Objects/mimalloc/init.c +++ b/Objects/mimalloc/init.c @@ -149,7 +149,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; static int debug_offsets[MI_NUM_HEAPS] = { [mi_heap_tag_default] = 0, [mi_heap_tag_obj] = offsetof(PyObject, ob_type), - [mi_heap_tag_gc] = sizeof(PyGC_Head) + offsetof(PyObject, ob_type), + [mi_heap_tag_gc] = 2 * sizeof(PyObject *) + sizeof(PyGC_Head) + offsetof(PyObject, ob_type), [mi_heap_tag_list_array] = -1, [mi_heap_tag_dict_keys] = -1 }; diff --git a/Objects/mimalloc/page.c b/Objects/mimalloc/page.c index 48158be49aa..872e048595c 100644 --- a/Objects/mimalloc/page.c +++ b/Objects/mimalloc/page.c @@ -376,6 +376,10 @@ void _mi_page_unfull(mi_page_t* page) { if (!mi_page_is_in_full(page)) return; mi_heap_t* heap = mi_page_heap(page); + if (page->tag == mi_heap_tag_gc) { + PyThreadState *tstate = _PyThreadState_GET(); + mi_atomic_addi64_relaxed(&tstate->interp->gc.gc_live, -page->capacity); + } mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); @@ -391,6 +395,11 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { if (mi_page_is_in_full(page)) return; mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set + + if (page->tag == mi_heap_tag_gc) { + PyThreadState *tstate = _PyThreadState_GET(); + mi_atomic_addi64_relaxed(&tstate->interp->gc.gc_live, page->capacity); + } } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 0501902902b..177cfcbecb8 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -910,32 +910,24 @@ bumpserialno(void) # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST #endif -/* Read sizeof(size_t) bytes at p as a big-endian size_t. */ -static size_t -read_size_t(const void *p) -{ - const uint8_t *q = (const uint8_t *)p; - size_t result = *q++; - int i; - - for (i = SST; --i > 0; ++q) - result = (result << 8) | *q; - return result; -} - -/* Write n as a big-endian size_t, MSB at address p, LSB at - * p + sizeof(size_t) - 1. +/* Write the size of a block to p. The size is stored + * as (n<<1)|1 so that the LSB of the first words of an + * allocated block is always set. */ static void -write_size_t(void *p, size_t n) +write_size_prefix(void *p, size_t n) { - uint8_t *q = (uint8_t *)p + SST - 1; - int i; + size_t v = (n << 1)|1; + memcpy(p, &v, sizeof(v)); +} - for (i = SST; --i >= 0; --q) { - *q = (uint8_t)(n & 0xff); - n >>= 8; - } +/* Reads the size prefix. */ +static size_t +read_size_prefix(const void *p) +{ + size_t value; + memcpy(&value, p, sizeof(value)); + return value >> 1; } /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and @@ -1009,7 +1001,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) #endif /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ - write_size_t(p, nbytes); + write_size_prefix(p, nbytes); p[SST] = (uint8_t)api->api_id; memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1); @@ -1021,7 +1013,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) tail = data + nbytes; memset(tail, PYMEM_FORBIDDENBYTE, SST); #ifdef PYMEM_DEBUG_SERIALNO - write_size_t(tail + SST, serialno); + write_size_prefix(tail + SST, serialno); #endif return data; @@ -1061,7 +1053,7 @@ _PyMem_DebugRawFree(void *ctx, void *p) size_t nbytes; _PyMem_DebugCheckAddress(__func__, api->api_id, p); - nbytes = read_size_t(q); + nbytes = read_size_prefix(q); nbytes += PYMEM_DEBUG_EXTRA_BYTES; memset(q, PYMEM_DEADBYTE, nbytes); api->alloc.free(api->alloc.ctx, q); @@ -1089,7 +1081,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) data = (uint8_t *)p; head = data - 2*SST; - original_nbytes = read_size_t(head); + original_nbytes = read_size_prefix(head); if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) { /* integer overflow: can't represent total as a Py_ssize_t */ return NULL; @@ -1098,7 +1090,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) tail = data + original_nbytes; #ifdef PYMEM_DEBUG_SERIALNO - size_t block_serialno = read_size_t(tail + SST); + size_t block_serialno = read_size_prefix(tail + SST); #endif /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and ERASED_SIZE bytes at the end as dead and save the copy of erased bytes. @@ -1132,14 +1124,14 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) } data = head + 2*SST; - write_size_t(head, nbytes); + write_size_prefix(head, nbytes); head[SST] = (uint8_t)api->api_id; memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1); tail = data + nbytes; memset(tail, PYMEM_FORBIDDENBYTE, SST); #ifdef PYMEM_DEBUG_SERIALNO - write_size_t(tail + SST, block_serialno); + write_size_prefix(tail + SST, block_serialno); #endif /* Restore saved bytes. */ @@ -1245,7 +1237,7 @@ _PyMem_DebugCheckAddress(const char *func, char api, const void *p) } } - nbytes = read_size_t(q - 2*SST); + nbytes = read_size_prefix(q - 2*SST); tail = q + nbytes; for (i = 0; i < SST; ++i) { if (tail[i] != PYMEM_FORBIDDENBYTE) { @@ -1274,7 +1266,7 @@ _PyObject_DebugDumpAddress(const void *p) id = (char)q[-SST]; fprintf(stderr, " API '%c'\n", id); - nbytes = read_size_t(q - 2*SST); + nbytes = read_size_prefix(q - 2*SST); fprintf(stderr, " %zu bytes originally requested\n", nbytes); /* In case this is nuts, check the leading pad bytes first. */ @@ -1330,7 +1322,7 @@ _PyObject_DebugDumpAddress(const void *p) } #ifdef PYMEM_DEBUG_SERIALNO - size_t serial = read_size_t(tail + SST); + size_t serial = read_size_prefix(tail + SST); fprintf(stderr, " The block was made by call #%zu to debug malloc/realloc.\n", serial); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c7033291bb5..9a6c824a3bd 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1293,15 +1293,23 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems) /* note that we need to add one, for the sentinel */ const size_t presize = _PyType_PreHeaderSize(type); - char *alloc = PyObject_Malloc(size + presize); - if (alloc == NULL) { - return PyErr_NoMemory(); - } - memset(alloc, '\0', size + presize); - obj = (PyObject *)(alloc + presize); if (presize) { + PyMemAllocatorEx *a = &_PyRuntime.allocators.standard.gc; + char *alloc = a->malloc(a->ctx, size + presize); + if (alloc == NULL) { + return PyErr_NoMemory(); + } + memset(alloc, '\0', size + presize); + obj = (PyObject *)(alloc + presize); _PyObject_GC_Link(obj); } + else { + obj = PyObject_Malloc(size); + if (obj == NULL) { + return PyErr_NoMemory(); + } + memset(obj, '\0', size); + } if (type->tp_itemsize == 0) { _PyObject_Init(obj, type); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 2609c85d278..448a61f286c 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -534,9 +534,6 @@ _PyEval_ReInitThreads(PyThreadState *tstate) if (_PyThread_at_fork_reinit(&pending->lock) < 0) { return _PyStatus_ERR("Can't reinitialize pending calls lock"); } - - /* Destroy all threads except the current one */ - _PyThreadState_DeleteExcept(runtime, tstate); return _PyStatus_OK(); } #endif diff --git a/Python/pystate.c b/Python/pystate.c index 9bb22feb09e..af01cd132ee 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -20,6 +20,7 @@ #include "parking_lot.h" #include "mimalloc.h" +#include "mimalloc-internal.h" /* -------------------------------------------------------------------------- CAUTION @@ -87,6 +88,7 @@ init_runtime(_PyRuntimeState *runtime, runtime->open_code_userdata = open_code_userdata; runtime->audit_hook_head = audit_hook_head; + _PyGC_ResetHeap(); _PyEval_InitRuntimeState(&runtime->ceval); PyPreConfig_InitPythonConfig(&runtime->preconfig); @@ -888,8 +890,11 @@ void _PyThreadState_SetCurrent(PyThreadState *tstate) { tstate->fast_thread_id = _Py_ThreadId(); + mi_tld_t *tld = mi_heap_get_default()->tld; + assert(tld->status == MI_THREAD_ALIVE); + mi_atomic_add_acq_rel(&tld->refcount, 1); for (int tag = 0; tag < Py_NUM_HEAPS; tag++) { - tstate->heaps[tag] = mi_heap_get_tag(tag); + tstate->heaps[tag] = &tld->heaps[tag]; } _PyParkingLot_InitThread(); _Py_queue_create(tstate); @@ -1104,6 +1109,10 @@ tstate_delete_common(PyThreadState *tstate, _Py_qsbr_unregister(tstate_impl->qsbr); tstate_impl->qsbr = NULL; + if (tstate->heaps[0] != NULL) { + _mi_thread_abandon(tstate->heaps[0]->tld); + } + for (int tag = 0; tag < Py_NUM_HEAPS; tag++) { tstate->heaps[tag] = NULL; } @@ -1179,14 +1188,14 @@ PyThreadState_DeleteCurrent(void) /* - * Delete all thread states except the one passed as argument. + * Detaches all thread states except the one passed as argument. * Note that, if there is a current thread state, it *must* be the one * passed as argument. Also, this won't touch any other interpreters * than the current one, since we don't know which thread state should * be kept in those other interpreters. */ -void -_PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate) +PyThreadState * +_PyThreadState_UnlinkExcept(_PyRuntimeState *runtime, PyThreadState *tstate, int already_dead) { PyInterpreterState *interp = tstate->interp; @@ -1194,31 +1203,48 @@ _PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate) /* Remove all thread states, except tstate, from the linked list of thread states. This will allow calling PyThreadState_Clear() without holding the lock. */ - PyThreadState *list = interp->threads.head; - if (list == tstate) { - list = tstate->next; - } - if (tstate->prev) { + PyThreadState *garbage = interp->threads.head; + if (garbage == tstate) + garbage = tstate->next; + if (tstate->prev) tstate->prev->next = tstate->next; - } - if (tstate->next) { + if (tstate->next) tstate->next->prev = tstate->prev; - } tstate->prev = tstate->next = NULL; interp->threads.head = tstate; HEAD_UNLOCK(runtime); - /* Clear and deallocate all stale thread states. Even if this - executes Python code, we should be safe since it executes - in the current thread, not one of the stale threads. */ - PyThreadState *p, *next; - for (p = list; p; p = next) { + for (PyThreadState *p = garbage; p; p = p->next) { + if (p->heaps[0] != NULL) { + mi_tld_t *tld = p->heaps[0]->tld; + if (already_dead) { + assert(tld->status == 0); + tld->status = MI_THREAD_DEAD; + } + _mi_thread_abandon(tld); + } + } + + return garbage; +} + +void +_PyThreadState_DeleteGarbage(PyThreadState *garbage) +{ + PyThreadState *next; + for (PyThreadState *p = garbage; p; p = next) { next = p->next; PyThreadState_Clear(p); free_threadstate(p); } } +void +_PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate) +{ + PyThreadState *garbage = _PyThreadState_UnlinkExcept(runtime, tstate, 0); + _PyThreadState_DeleteGarbage(garbage); +} PyThreadState * _PyThreadState_UncheckedGet(void)