From 7520781c6dd6cf22bf07a48d5406c71cd65a41b8 Mon Sep 17 00:00:00 2001 From: Timothy Elliott Date: Wed, 24 Apr 2013 21:18:02 -0700 Subject: [PATCH] Clean all occurrences of _private on GC when libxml-ruby is loaded. libxml-ruby uses the global libxml2 callback xmlDeregisterNodeDefault. This callback looks in the _private field for every deleted libxml2 node. If the _private field is set, it is treated as a Ruby VALUE ptr. The callback NULLs the Ruby object's data pointer and mark & free fn pointers. When Nokogiri wraps a libxml2 document it puts a nokogiriTuple ptr in the _private field. We can't let the libxml-ruby callback be invoked on a libxml2 document before NULLing the document _private field. When Nokogiri wraps other libxml2 nodes it puts VALUE ptrs in the _private field. This makes the libxml-ruby callback generally safe for these node types. There is a caveat - it is unsafe to access VALUE ptrs during GC sweep. This commit tests whether the xmlDeregisterNodeDefault callback is set during document cleanup. If set, we traverse the document and clean up all _private fields. The previous solution was to unset xmlDeregisterNodeDefault callback. However, this doesn't work in multithreaded environments. --- ext/nokogiri/xml_document.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 19bb876b822..d4275fc877e 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -17,13 +17,29 @@ static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc) return ST_CONTINUE; } +static void remove_private(xmlNodePtr node) +{ + xmlNodePtr child; + + for (child = node->children; child; child = child->next) + remove_private(child); + + if ((node->type == XML_ELEMENT_NODE || + node->type == XML_XINCLUDE_START || + node->type == XML_XINCLUDE_END) && + node->properties) { + for (child = (xmlNodePtr)node->properties; child; child = child->next) + remove_private(child); + } + + node->_private = NULL; +} + static void dealloc(xmlDocPtr doc) { - xmlDeregisterNodeFunc func; st_table *node_hash; NOKOGIRI_DEBUG_START(doc); - func = xmlDeregisterNodeDefault(NULL); node_hash = DOC_UNLINKED_NODE_HASH(doc); @@ -31,10 +47,17 @@ static void dealloc(xmlDocPtr doc) st_free_table(node_hash); free(doc->_private); - doc->_private = NULL; + + /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes + * have their _private pointers cleared. This is to avoid libxml-ruby's + * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC + * free context, which can result in segfaults. + */ + if (xmlDeregisterNodeDefaultValue) + remove_private((xmlNodePtr)doc); + xmlFreeDoc(doc); - xmlDeregisterNodeDefault(func); NOKOGIRI_DEBUG_END(doc); }