Skip to content

Commit

Permalink
pythongh-115103: Implement delayed memory reclamation (QSBR)
Browse files Browse the repository at this point in the history
  • Loading branch information
colesbury committed Feb 7, 2024
1 parent fedbf77 commit cbfc9a5
Show file tree
Hide file tree
Showing 13 changed files with 485 additions and 0 deletions.
32 changes: 32 additions & 0 deletions Doc/license.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1095,3 +1095,35 @@ which is distributed under the MIT license::
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


Global Unbounded Sequences (GUS)
--------------------------------

The file :file:`Python/qsbr.c` is adapted from FreeBSD's "Global Unbounded
Sequences" safe memory reclamation scheme in
`subr_smr.c <https://github.com/freebsd/freebsd-src/blob/main/sys/kern/subr_smr.c>`_.
The file is distributed under the 2-Clause BSD License::

Copyright (c) 2019,2020 Jeffrey Roberson <[email protected]>

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice unmodified, this list of conditions, and the following
disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 2 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ extern "C" {
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
#include "pycore_object_state.h" // struct _py_object_state
#include "pycore_obmalloc.h" // struct _obmalloc_state
#include "pycore_qsbr.h" // struct _qsbr_state
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_tuple.h" // struct _Py_tuple_state
#include "pycore_typeobject.h" // struct types_state
Expand Down Expand Up @@ -198,6 +199,7 @@ struct _is {
struct _warnings_runtime_state warnings;
struct atexit_state atexit;
struct _stoptheworld_state stoptheworld;
struct _qsbr_shared qsbr;

#if defined(Py_GIL_DISABLED)
struct _mimalloc_interp_state mimalloc;
Expand Down
117 changes: 117 additions & 0 deletions Include/internal/pycore_qsbr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#ifndef Py_INTERNAL_QSBR_H
#define Py_INTERNAL_QSBR_H

#include <stdbool.h>
#include <stdint.h>
#include "pycore_lock.h" // PyMutex

#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

struct _qsbr_shared;
struct _PyThreadStateImpl; // forward declare to avoid circular dependency

// Per-thread state
struct _qsbr_thread_state {
// Last observed write sequence (or 0 if detached)
uint64_t seq;

// Shared (per-interpreter) QSBR state
struct _qsbr_shared *shared;

// Thread state (or NULL)
PyThreadState *tstate;

// Used to defer advancing write sequence a fixed number of times
int deferrals;

// Is this thread state allocated?
bool allocated;
struct _qsbr_thread_state *freelist_next;
};

// Padding to avoid false sharing
struct _qsbr_pad {
struct _qsbr_thread_state qsbr;
char __padding[64 - sizeof(struct _qsbr_thread_state)];
};

// Per-interpreter state
struct _qsbr_shared {
// Always odd, incremented by two
uint64_t wr_seq;

// Minimum observed read sequence
uint64_t rd_seq;

// Array of QSBR thread states.
struct _qsbr_pad *array;
Py_ssize_t size;

// Freelist of unused _qsbr_thread_states (protected by mutex)
PyMutex mutex;
struct _qsbr_thread_state *freelist;
};

static inline uint64_t
_Py_qsbr_shared_current(struct _qsbr_shared *shared)
{
return _Py_atomic_load_uint64(&shared->wr_seq); // at least acquire
}

static inline void
_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
{
uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
_Py_atomic_store_uint64_relaxed(&qsbr->seq, seq); // probably release
}

// Advance the write sequence and return the new goal.
extern uint64_t
_Py_qsbr_advance(struct _qsbr_shared *shared);

// Batches requests to advance the write sequence. This advances the write
// sequence every N calls. Returns the new goal.
extern uint64_t
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr);

// Have the read sequences advanced to the given goal?
extern bool
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);

// Called when thread attaches to interpreter
extern void
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr);

// Called when thread detaches from interpreter
extern void
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr);

// Reserves (allocates) a QSBR state and returns its index
extern Py_ssize_t
_Py_qsbr_reserve(PyInterpreterState *interp);

// Associates a PyThreadState with the QSBR state at the given index
extern void
_Py_qsbr_register(struct _PyThreadStateImpl *tstate,
PyInterpreterState *interp, Py_ssize_t index);

// Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
extern void
_Py_qsbr_unregister(struct _PyThreadStateImpl *tstate);

extern void
_Py_qsbr_fini(PyInterpreterState *interp);

extern void
_Py_qsbr_after_fork(struct _qsbr_shared *shared, struct _qsbr_thread_state *qsbr);

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_QSBR_H */
4 changes: 4 additions & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ extern PyTypeObject _PyExc_MemoryError;
{ .threshold = 10, }, \
}, \
}, \
.qsbr = { \
.wr_seq = 1, \
.rd_seq = 1, \
}, \
.object_state = _py_object_state_INIT(INTERP), \
.dtoa = _dtoa_state_INIT(&(INTERP)), \
.dict_state = _dict_state_INIT, \
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {

#include "pycore_freelist.h" // struct _Py_freelist_state
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
#include "pycore_qsbr.h" // struct qsbr


// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
Expand All @@ -20,6 +21,7 @@ typedef struct _PyThreadStateImpl {
PyThreadState base;

#ifdef Py_GIL_DISABLED
struct _qsbr_thread_state *qsbr;
struct _mimalloc_thread_state mimalloc;
struct _Py_freelist_state freelist_state;
#endif
Expand Down
2 changes: 2 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ PYTHON_OBJS= \
Python/pystate.o \
Python/pythonrun.o \
Python/pytime.o \
Python/qsbr.o \
Python/bootstrap_hash.o \
Python/specialize.o \
Python/structmember.o \
Expand Down Expand Up @@ -1158,6 +1159,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_pystats.h \
$(srcdir)/Include/internal/pycore_pythonrun.h \
$(srcdir)/Include/internal/pycore_pythread.h \
$(srcdir)/Include/internal/pycore_qsbr.h \
$(srcdir)/Include/internal/pycore_range.h \
$(srcdir)/Include/internal/pycore_runtime.h \
$(srcdir)/Include/internal/pycore_runtime_init.h \
Expand Down
1 change: 1 addition & 0 deletions PCbuild/_freeze_module.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@
<ClCompile Include="..\Python\pythonrun.c" />
<ClCompile Include="..\Python\Python-tokenize.c" />
<ClCompile Include="..\Python\pytime.c" />
<ClCompile Include="..\Python\qsbr.c" />
<ClCompile Include="..\Python\specialize.c" />
<ClCompile Include="..\Python\structmember.c" />
<ClCompile Include="..\Python\suggestions.c" />
Expand Down
3 changes: 3 additions & 0 deletions PCbuild/_freeze_module.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,9 @@
<ClCompile Include="..\Python\pytime.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\qsbr.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Objects\rangeobject.c">
<Filter>Source Files</Filter>
</ClCompile>
Expand Down
2 changes: 2 additions & 0 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@
<ClInclude Include="..\Include\internal\pycore_pystats.h" />
<ClInclude Include="..\Include\internal\pycore_pythonrun.h" />
<ClInclude Include="..\Include\internal\pycore_pythread.h" />
<ClInclude Include="..\Include\internal\pycore_qsbr.h" />
<ClInclude Include="..\Include\internal\pycore_range.h" />
<ClInclude Include="..\Include\internal\pycore_runtime.h" />
<ClInclude Include="..\Include\internal\pycore_runtime_init.h" />
Expand Down Expand Up @@ -611,6 +612,7 @@
<ClCompile Include="..\Python\pystrcmp.c" />
<ClCompile Include="..\Python\pystrhex.c" />
<ClCompile Include="..\Python\pystrtod.c" />
<ClCompile Include="..\Python\qsbr.c" />
<ClCompile Include="..\Python\dtoa.c" />
<ClCompile Include="..\Python\Python-ast.c" />
<ClCompile Include="..\Python\Python-tokenize.c" />
Expand Down
6 changes: 6 additions & 0 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,9 @@
<ClInclude Include="..\Include\internal\pycore_pythread.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_qsbr.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_range.h">
<Filter>Include\internal</Filter>
</ClInclude>
Expand Down Expand Up @@ -1412,6 +1415,9 @@
<ClCompile Include="..\Python\pystrtod.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\qsbr.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\dtoa.c">
<Filter>Python</Filter>
</ClCompile>
Expand Down
7 changes: 7 additions & 0 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@
#define PRE_DISPATCH_GOTO() ((void)0)
#endif

#ifdef Py_GIL_DISABLED
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
#else
#define QSBR_QUIESCENT_STATE(tstate)
#endif


/* Do interpreter dispatch accounting for tracing and instrumentation */
#define DISPATCH() \
Expand Down Expand Up @@ -117,6 +123,7 @@

#define CHECK_EVAL_BREAKER() \
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
QSBR_QUIESCENT_STATE(tstate); \
if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \
if (_Py_HandlePending(tstate) != 0) { \
GOTO_ERROR(error); \
Expand Down
27 changes: 27 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,8 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
PyThread_free_lock(interp->id_mutex);
}

_Py_qsbr_fini(interp);

_PyObject_FiniState(interp);

free_interpreter(interp);
Expand Down Expand Up @@ -1372,6 +1374,14 @@ new_threadstate(PyInterpreterState *interp, int whence)
if (new_tstate == NULL) {
return NULL;
}
#ifdef Py_GIL_DISABLED
Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp);
if (qsbr_idx < 0) {
PyMem_RawFree(new_tstate);
return NULL;
}
#endif

/* We serialize concurrent creation to protect global state. */
HEAD_LOCK(runtime);

Expand All @@ -1398,6 +1408,9 @@ new_threadstate(PyInterpreterState *interp, int whence)
sizeof(*tstate));
}

#ifdef Py_GIL_DISABLED
_Py_qsbr_register(tstate, interp, qsbr_idx);
#endif
init_threadstate(tstate, interp, id, whence);
add_threadstate(interp, (PyThreadState *)tstate, old_head);

Expand Down Expand Up @@ -1609,6 +1622,10 @@ tstate_delete_common(PyThreadState *tstate)
}
HEAD_UNLOCK(runtime);

#ifdef Py_GIL_DISABLED
_Py_qsbr_unregister((_PyThreadStateImpl *)tstate);
#endif

// XXX Unbind in PyThreadState_Clear(), or earlier
// (and assert not-equal here)?
if (tstate->_status.bound_gilstate) {
Expand Down Expand Up @@ -1650,6 +1667,9 @@ void
_PyThreadState_DeleteCurrent(PyThreadState *tstate)
{
_Py_EnsureTstateNotNULL(tstate);
#ifdef Py_GIL_DISABLED
_Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
#endif
tstate_set_detached(tstate);
tstate_delete_common(tstate);
current_fast_clear(tstate->interp->runtime);
Expand Down Expand Up @@ -1871,6 +1891,10 @@ _PyThreadState_Attach(PyThreadState *tstate)
tstate_wait_attach(tstate);
}

#ifdef Py_GIL_DISABLED
_Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
#endif

// Resume previous critical section. This acquires the lock(s) from the
// top-most critical section.
if (tstate->critical_section != 0) {
Expand All @@ -1891,6 +1915,9 @@ detach_thread(PyThreadState *tstate, int detached_state)
if (tstate->critical_section != 0) {
_PyCriticalSection_SuspendAll(tstate);
}
#ifdef Py_GIL_DISABLED
_Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
#endif
tstate_deactivate(tstate);
tstate_set_detached(tstate);
current_fast_clear(&_PyRuntime);
Expand Down
Loading

0 comments on commit cbfc9a5

Please sign in to comment.