From 909b49f7649b8675d62f2e214feea9cda535ae4b Mon Sep 17 00:00:00 2001 From: Max Carrara Date: Mon, 6 May 2024 18:44:30 +0200 Subject: [PATCH] add experimental support for PEP 489 multi-phase module initialization This commit adds experimental but *almost* fully functional support for multi-phase module initialization as specified in PEP 489. With the exception of adding submodules to modules, all other functionality should be retained. In other words, unless submodules are used, multi-phase initialization is a complete drop-in replacement. Note that this commit serves as a demonstration & basis for further improvements only; many tests have therefore not been adapted correspondingly yet. Nevertheless, it runs. Signed-off-by: Max Carrara --- pyo3-macros-backend/src/module.rs | 69 +++++++++++- src/impl_/pymodule.rs | 176 ++++++++++++++++++------------ src/impl_/trampoline.rs | 6 +- src/instance.rs | 2 +- 4 files changed, 176 insertions(+), 77 deletions(-) diff --git a/pyo3-macros-backend/src/module.rs b/pyo3-macros-backend/src/module.rs index 1e764176550..5e1fce980c2 100644 --- a/pyo3-macros-backend/src/module.rs +++ b/pyo3-macros-backend/src/module.rs @@ -339,8 +339,7 @@ pub fn pymodule_module_impl( unsafe { impl_::ModuleDef::new( __PYO3_NAME, - #doc, - INITIALIZER + #doc ) } }}; @@ -385,6 +384,9 @@ pub fn pymodule_function_impl( let initialization = module_initialization(&name, ctx, quote! { MakeDef::make_def() }, false); + // Each generated `module_objects_init` function is exported as a separate symbol. + let module_objects_init_symbol = format!("__module_objects_init__{}", ident.unraw()); + // Module function called with optional Python<'_> marker as first arg, followed by the module. let mut module_args = Vec::new(); if function.sig.inputs.len() == 2 { @@ -396,6 +398,32 @@ pub fn pymodule_function_impl( Ok(quote! { #[doc(hidden)] #vis mod #ident { + /// Function used to add classes, functions, etc. to the module during + /// multi-phase initialization. + #[doc(hidden)] + #[export_name = #module_objects_init_symbol] + pub unsafe extern "C" fn __module_objects_init(module: *mut #pyo3_path::ffi::PyObject) -> ::std::ffi::c_int { + let module = unsafe { + let nonnull = ::std::ptr::NonNull::new(module).expect("'module' shouldn't be NULL"); + #pyo3_path::Py::<#pyo3_path::types::PyModule>::from_non_null(nonnull) + }; + + let res = unsafe { + #pyo3_path::Python::with_gil_unchecked(|py| { + let bound = module.bind(py); + MakeDef::do_init_multiphase(bound) + }) + }; + + // FIXME: Better error handling? + let _ = res.unwrap(); + + 0 + } + + #[doc(hidden)] + pub const __PYO3_INIT: *mut ::std::ffi::c_void = __module_objects_init as *mut ::std::ffi::c_void; + #initialization } @@ -405,17 +433,22 @@ pub fn pymodule_function_impl( // inside a function body) #[allow(unknown_lints, non_local_definitions)] impl #ident::MakeDef { + /// Helper function for `__module_objects_init`. Should probably be put + /// somewhere else. + #[doc(hidden)] + pub fn do_init_multiphase(module: &#pyo3_path::Bound<'_, #pyo3_path::types::PyModule>) -> #pyo3_path::PyResult<()> { + #ident(#(#module_args),*) + } + const fn make_def() -> #pyo3_path::impl_::pymodule::ModuleDef { fn __pyo3_pymodule(module: &#pyo3_path::Bound<'_, #pyo3_path::types::PyModule>) -> #pyo3_path::PyResult<()> { #ident(#(#module_args),*) } - const INITIALIZER: #pyo3_path::impl_::pymodule::ModuleInitializer = #pyo3_path::impl_::pymodule::ModuleInitializer(__pyo3_pymodule); unsafe { #pyo3_path::impl_::pymodule::ModuleDef::new( #ident::__PYO3_NAME, #doc, - INITIALIZER ) } } @@ -442,14 +475,40 @@ fn module_initialization( #[doc(hidden)] pub static _PYO3_DEF: #pyo3_path::impl_::pymodule::ModuleDef = #module_def; }; + + // TODO: figure out how to support submodules (for both fn pymodules and mod pymodules) if !is_submodule { result.extend(quote! { + #[doc(hidden)] + pub static _PYO3_SLOTS: &[#pyo3_path::impl_::pymodule::ModuleDefSlot] = &[ + #pyo3_path::impl_::pymodule::ModuleDefSlot(#pyo3_path::ffi::PyModuleDef_Slot { + slot: #pyo3_path::ffi::Py_mod_exec, + value: #pyo3_path::impl_::pymodule_state::module_state_init as *mut ::std::ffi::c_void, + }), + #pyo3_path::impl_::pymodule::ModuleDefSlot(#pyo3_path::ffi::PyModuleDef_Slot { + slot: #pyo3_path::ffi::Py_mod_exec, + value: __PYO3_INIT, + }), + #[cfg(Py_3_12)] + #pyo3_path::impl_::pymodule::ModuleDefSlot(#pyo3_path::ffi::PyModuleDef_Slot { + slot: #pyo3_path::ffi::Py_mod_multiple_interpreters, + value: #pyo3_path::ffi::Py_MOD_PER_INTERPRETER_GIL_SUPPORTED, + }), + #pyo3_path::impl_::pymodule::ModuleDefSlot(#pyo3_path::ffi::PyModuleDef_Slot { + slot: 0, + value: ::std::ptr::null_mut(), + }), + ]; + /// This autogenerated function is called by the python interpreter when importing /// the module. #[doc(hidden)] #[export_name = #pyinit_symbol] pub unsafe extern "C" fn __pyo3_init() -> *mut #pyo3_path::ffi::PyObject { - #pyo3_path::impl_::trampoline::module_init(|py| _PYO3_DEF.make_module(py)) + #pyo3_path::impl_::trampoline::module_init(|py| { + _PYO3_DEF.set_multiphase_items(_PYO3_SLOTS); + _PYO3_DEF.make_module(py); + }) } }); } diff --git a/src/impl_/pymodule.rs b/src/impl_/pymodule.rs index b05652bced8..8b778d79d06 100644 --- a/src/impl_/pymodule.rs +++ b/src/impl_/pymodule.rs @@ -1,6 +1,6 @@ //! Implementation details of `#[pymodule]` which need to be accessible from proc-macro generated code. -use std::{cell::UnsafeCell, ffi::CStr, marker::PhantomData}; +use std::{cell::UnsafeCell, marker::PhantomData}; #[cfg(all( not(any(PyPy, GraalPy)), @@ -26,11 +26,18 @@ use crate::{ Bound, Py, PyClass, PyMethodDef, PyResult, PyTypeInfo, Python, }; +use crate::impl_::pymodule_state as state; + +/// `Sync` wrapper of `ffi::PyModuleDef_Slot`. +#[allow(unused)] +pub struct ModuleDefSlot(pub ffi::PyModuleDef_Slot); + +unsafe impl Sync for ModuleDefSlot {} + /// `Sync` wrapper of `ffi::PyModuleDef`. pub struct ModuleDef { // wrapped in UnsafeCell so that Rust compiler treats this as interior mutability ffi_def: UnsafeCell, - initializer: ModuleInitializer, /// Interpreter ID where module was initialized (not applicable on PyPy). #[cfg(all( not(any(PyPy, GraalPy)), @@ -38,22 +45,20 @@ pub struct ModuleDef { not(all(windows, Py_LIMITED_API, not(Py_3_10))) ))] interpreter: AtomicI64, + // TODO: Figure out how to cache module with multi-phase init /// Initialized module object, cached to avoid reinitialization. + #[allow(unused)] module: GILOnceCell>, } -/// Wrapper to enable initializer to be used in const fns. -pub struct ModuleInitializer(pub for<'py> fn(&Bound<'py, PyModule>) -> PyResult<()>); - unsafe impl Sync for ModuleDef {} impl ModuleDef { /// Make new module definition with given module name. - pub const unsafe fn new( - name: &'static CStr, - doc: &'static CStr, - initializer: ModuleInitializer, - ) -> Self { + /// + /// # Safety + /// `name` and `doc` must be null-terminated strings. + pub const unsafe fn new(name: &'static str, doc: &'static str) -> Self { const INIT: ffi::PyModuleDef = ffi::PyModuleDef { m_base: ffi::PyModuleDef_HEAD_INIT, m_name: std::ptr::null(), @@ -67,14 +72,13 @@ impl ModuleDef { }; let ffi_def = UnsafeCell::new(ffi::PyModuleDef { - m_name: name.as_ptr(), - m_doc: doc.as_ptr(), + m_name: name.as_ptr() as *const _, + m_doc: doc.as_ptr() as *const _, ..INIT }); ModuleDef { ffi_def, - initializer, // -1 is never expected to be a valid interpreter ID #[cfg(all( not(any(PyPy, GraalPy)), @@ -85,8 +89,9 @@ impl ModuleDef { module: GILOnceCell::new(), } } + /// Builds a module using user given initializer. Used for [`#[pymodule]`][crate::pymodule]. - pub fn make_module(&'static self, py: Python<'_>) -> PyResult> { + pub fn make_module(&'static self, py: Python<'_>) -> PyResult<*mut ffi::PyModuleDef> { #[cfg(all(PyPy, not(Py_3_8)))] { use crate::types::any::PyAnyMethods; @@ -140,18 +145,32 @@ impl ModuleDef { } } } - self.module - .get_or_try_init(py, || { - let module = unsafe { - Py::::from_owned_ptr_or_err( - py, - ffi::PyModule_Create(self.ffi_def.get()), - )? - }; - self.initializer.0(module.bind(py))?; - Ok(module) - }) - .map(|py_module| py_module.clone_ref(py)) + + if (unsafe { *self.ffi_def.get() }).m_slots.is_null() { + return Err(PyImportError::new_err( + "'m_slots' of module definition is NULL", + )); + } + + let module_def_ptr = unsafe { ffi::PyModuleDef_Init(self.ffi_def.get()) }; + + if module_def_ptr.is_null() { + return Err(PyImportError::new_err("PyModuleDef_Init returned NULL")); + } + + Ok(module_def_ptr.cast()) + } + + pub fn set_multiphase_items(&'static self, slots: &'static [ModuleDefSlot]) { + let slots = slots as *const [ModuleDefSlot] as *mut ffi::PyModuleDef_Slot; + let ffi_def = self.ffi_def.get(); + unsafe { + (*ffi_def).m_size = std::mem::size_of::() as ffi::Py_ssize_t; + (*ffi_def).m_slots = slots; + (*ffi_def).m_traverse = Some(state::module_state_traverse); + (*ffi_def).m_clear = Some(state::module_state_clear); + (*ffi_def).m_free = Some(state::module_state_free); + }; } } @@ -203,8 +222,24 @@ impl PyAddToModule for PyMethodDef { /// For adding a module to a module. impl PyAddToModule for ModuleDef { - fn add_to_module(&'static self, module: &Bound<'_, PyModule>) -> PyResult<()> { - module.add_submodule(self.make_module(module.py())?.bind(module.py())) + fn add_to_module(&'static self, _module: &Bound<'_, PyModule>) -> PyResult<()> { + // FIXME: Support multi-phase initialization + // + // Ideas: + // * have the fully initialized module be passed along? + // * have a separate way to initialize submodules? + // 1. init module first with "generic" multi-phase initialization + // 2. *then* add functions etc. -- should be fine? + // 3. finally add it as submodule + // + // Leaving this here for future me: I'm 99% sure that submodules also need + // to be initialized using multi-phase initialization - why wouldn't they? + // This should technically be easy to solve using the new mod pymodule proc + // macro - but the fn macro case feel *meh*. + // + // After that's done, finally tackle static data via some neat internal + // API of ModuleState. + unimplemented!("Adding submodules to a module is not supported at the moment.") } } @@ -218,50 +253,39 @@ mod tests { use crate::{ ffi, + impl_::pymodule_state::module_state_init, types::{any::PyAnyMethods, module::PyModuleMethods, PyModule}, Bound, PyResult, Python, }; - use super::{ModuleDef, ModuleInitializer}; + use super::{ModuleDef, ModuleDefSlot}; #[test] fn module_init() { - static MODULE_DEF: ModuleDef = unsafe { - ModuleDef::new( - ffi::c_str!("test_module"), - ffi::c_str!("some doc"), - ModuleInitializer(|m| { - m.add("SOME_CONSTANT", 42)?; - Ok(()) - }), - ) - }; + static MODULE_DEF: ModuleDef = unsafe { ModuleDef::new("test_module\0", "some doc\0") }; + + static SLOTS: &[ModuleDefSlot] = &[ + ModuleDefSlot(ffi::PyModuleDef_Slot { + slot: ffi::Py_mod_exec, + value: module_state_init as *mut std::ffi::c_void, + }), + #[cfg(Py_3_12)] + ModuleDefSlot(ffi::PyModuleDef_Slot { + slot: ffi::Py_mod_multiple_interpreters, + value: ffi::Py_MOD_PER_INTERPRETER_GIL_SUPPORTED, + }), + ModuleDefSlot(ffi::PyModuleDef_Slot { + slot: 0, + value: std::ptr::null_mut(), + }), + ]; + + MODULE_DEF.set_multiphase_items(SLOTS); + Python::with_gil(|py| { - let module = MODULE_DEF.make_module(py).unwrap().into_bound(py); - assert_eq!( - module - .getattr("__name__") - .unwrap() - .extract::>() - .unwrap(), - "test_module", - ); - assert_eq!( - module - .getattr("__doc__") - .unwrap() - .extract::>() - .unwrap(), - "some doc", - ); - assert_eq!( - module - .getattr("SOME_CONSTANT") - .unwrap() - .extract::() - .unwrap(), - 42, - ); + let module_def = MODULE_DEF.make_module(py).unwrap(); + // FIXME: Use PyState_FindModule to retrieve module here? + unimplemented!("Test currently not implemented"); }) } @@ -269,8 +293,24 @@ mod tests { fn module_def_new() { // To get coverage for ModuleDef::new() need to create a non-static ModuleDef, however init // etc require static ModuleDef, so this test needs to be separated out. - static NAME: &CStr = ffi::c_str!("test_module"); - static DOC: &CStr = ffi::c_str!("some doc"); + static NAME: &str = "test_module\0"; + static DOC: &str = "some doc\0"; + + static SLOTS: &[ModuleDefSlot] = &[ + ModuleDefSlot(ffi::PyModuleDef_Slot { + slot: ffi::Py_mod_exec, + value: module_state_init as *mut std::ffi::c_void, + }), + #[cfg(Py_3_12)] + ModuleDefSlot(ffi::PyModuleDef_Slot { + slot: ffi::Py_mod_multiple_interpreters, + value: ffi::Py_MOD_PER_INTERPRETER_GIL_SUPPORTED, + }), + ModuleDefSlot(ffi::PyModuleDef_Slot { + slot: 0, + value: std::ptr::null_mut(), + }), + ]; static INIT_CALLED: AtomicBool = AtomicBool::new(false); @@ -281,12 +321,12 @@ mod tests { } unsafe { - let module_def: ModuleDef = ModuleDef::new(NAME, DOC, ModuleInitializer(init)); + let module_def: ModuleDef = ModuleDef::new(NAME, DOC); + module_def.set_multiphase_items(SLOTS); assert_eq!((*module_def.ffi_def.get()).m_name, NAME.as_ptr() as _); assert_eq!((*module_def.ffi_def.get()).m_doc, DOC.as_ptr() as _); Python::with_gil(|py| { - module_def.initializer.0(&py.import_bound("builtins").unwrap()).unwrap(); assert!(INIT_CALLED.load(Ordering::SeqCst)); }) } diff --git a/src/impl_/trampoline.rs b/src/impl_/trampoline.rs index f485258e5e5..0ba63680a74 100644 --- a/src/impl_/trampoline.rs +++ b/src/impl_/trampoline.rs @@ -12,14 +12,14 @@ use std::{ use crate::gil::GILGuard; use crate::{ callback::PyCallbackOutput, ffi, ffi_ptr_ext::FfiPtrExt, impl_::panic::PanicTrap, - methods::IPowModulo, panic::PanicException, types::PyModule, Py, PyResult, Python, + methods::IPowModulo, panic::PanicException, PyResult, Python, }; #[inline] pub unsafe fn module_init( - f: for<'py> unsafe fn(Python<'py>) -> PyResult>, + f: for<'py> unsafe fn(Python<'py>) -> PyResult<*mut ffi::PyModuleDef>, ) -> *mut ffi::PyObject { - trampoline(|py| f(py).map(|module| module.into_ptr())) + trampoline(|py| f(py).map(|module_def| module_def.cast())) } #[inline] diff --git a/src/instance.rs b/src/instance.rs index 3b8e2529a6e..e268af4de73 100644 --- a/src/instance.rs +++ b/src/instance.rs @@ -1770,7 +1770,7 @@ impl Py { /// /// # Safety /// `ptr` must point to a Python object of type T. - unsafe fn from_non_null(ptr: NonNull) -> Self { + pub unsafe fn from_non_null(ptr: NonNull) -> Self { Self(ptr, PhantomData) } }