From 1b2ea2e908167b2779234895ef845f25b430fa83 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 6 Oct 2022 16:20:01 -0700 Subject: [PATCH] GH-97002: Prevent `_PyInterpreterFrame`s from backing more than one `PyFrameObject` (GH-97996) (cherry picked from commit 21a2d9ff550977f2668e2cf1cc15793bf27fa109) Co-authored-by: Brandt Bucher --- Lib/test/test_frame.py | 65 +++++++++++++++++++ ...2-10-06-02-11-34.gh-issue-97002.Zvsk71.rst | 3 + Python/frame.c | 29 +++++++-- 3 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index 5dda2fbfac374c..4b86a60d2f4c36 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -1,3 +1,4 @@ +import gc import re import sys import textwrap @@ -261,5 +262,69 @@ def gen(): """) assert_python_ok("-c", code) + @support.cpython_only + def test_sneaky_frame_object(self): + + def trace(frame, event, arg): + """ + Don't actually do anything, just force a frame object to be created. + """ + + def callback(phase, info): + """ + Yo dawg, I heard you like frames, so I'm allocating a frame while + you're allocating a frame, so you can have a frame while you have a + frame! + """ + nonlocal sneaky_frame_object + sneaky_frame_object = sys._getframe().f_back + # We're done here: + gc.callbacks.remove(callback) + + def f(): + while True: + yield + + old_threshold = gc.get_threshold() + old_callbacks = gc.callbacks[:] + old_enabled = gc.isenabled() + old_trace = sys.gettrace() + try: + # Stop the GC for a second while we set things up: + gc.disable() + # Create a paused generator: + g = f() + next(g) + # Move all objects to the oldest generation, and tell the GC to run + # on the *very next* allocation: + gc.collect() + gc.set_threshold(1, 0, 0) + # Okay, so here's the nightmare scenario: + # - We're tracing the resumption of a generator, which creates a new + # frame object. + # - The allocation of this frame object triggers a collection + # *before* the frame object is actually created. + # - During the collection, we request the exact same frame object. + # This test does it with a GC callback, but in real code it would + # likely be a trace function, weakref callback, or finalizer. + # - The collection finishes, and the original frame object is + # created. We now have two frame objects fighting over ownership + # of the same interpreter frame! + sys.settrace(trace) + gc.callbacks.append(callback) + sneaky_frame_object = None + gc.enable() + next(g) + # g.gi_frame should be the the frame object from the callback (the + # one that was *requested* second, but *created* first): + self.assertIs(g.gi_frame, sneaky_frame_object) + finally: + gc.set_threshold(*old_threshold) + gc.callbacks[:] = old_callbacks + sys.settrace(old_trace) + if old_enabled: + gc.enable() + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst new file mode 100644 index 00000000000000..1f577e02e1fd8a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst @@ -0,0 +1,3 @@ +Fix an issue where several frame objects could be backed by the same +interpreter frame, possibly leading to corrupted memory and hard crashes of +the interpreter. diff --git a/Python/frame.c b/Python/frame.c index 9f58c20d4fa25d..d8f2f801f38c72 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -35,14 +35,31 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame) Py_XDECREF(error_type); Py_XDECREF(error_value); Py_XDECREF(error_traceback); + return NULL; } - else { - assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); - assert(frame->owner != FRAME_CLEARED); - f->f_frame = frame; - frame->frame_obj = f; - PyErr_Restore(error_type, error_value, error_traceback); + PyErr_Restore(error_type, error_value, error_traceback); + if (frame->frame_obj) { + // GH-97002: How did we get into this horrible situation? Most likely, + // allocating f triggered a GC collection, which ran some code that + // *also* created the same frame... while we were in the middle of + // creating it! See test_sneaky_frame_object in test_frame.py for a + // concrete example. + // + // Regardless, just throw f away and use that frame instead, since it's + // already been exposed to user code. It's actually a bit tricky to do + // this, since we aren't backed by a real _PyInterpreterFrame anymore. + // Just pretend that we have an owned, cleared frame so frame_dealloc + // doesn't make the situation worse: + f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data; + f->f_frame->owner = FRAME_CLEARED; + f->f_frame->frame_obj = f; + Py_DECREF(f); + return frame->frame_obj; } + assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); + assert(frame->owner != FRAME_CLEARED); + f->f_frame = frame; + frame->frame_obj = f; return f; }