diff --git a/ext/nokogiri/html4_document.c b/ext/nokogiri/html4_document.c
index 939321e3bda..7b816d5ff04 100644
--- a/ext/nokogiri/html4_document.c
+++ b/ext/nokogiri/html4_document.c
@@ -145,7 +145,7 @@ static VALUE
rb_html_document_type(VALUE self)
{
htmlDocPtr doc;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
return INT2NUM(doc->type);
}
diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h
index 4c6bea855e9..bc7f515f992 100644
--- a/ext/nokogiri/nokogiri.h
+++ b/ext/nokogiri/nokogiri.h
@@ -166,6 +166,8 @@ typedef struct _nokogiriXsltStylesheetTuple {
VALUE func_instances;
} nokogiriXsltStylesheetTuple;
+extern const rb_data_type_t noko_xml_document_data_type;
+
void noko_xml_document_pin_node(xmlNodePtr);
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
diff --git a/ext/nokogiri/xml_comment.c b/ext/nokogiri/xml_comment.c
index 81b57ca472c..35db13d098f 100644
--- a/ext/nokogiri/xml_comment.c
+++ b/ext/nokogiri/xml_comment.c
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
}
- Data_Get_Struct(document, xmlDoc, xml_doc);
+ TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
node = xmlNewDocComment(
xml_doc,
diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c
index a55daa1b55e..8dff604bec7 100644
--- a/ext/nokogiri/xml_document.c
+++ b/ext/nokogiri/xml_document.c
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
}
static void
-mark(xmlDocPtr doc)
+mark(void *data)
{
+ xmlDocPtr doc = (xmlDocPtr)data;
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
if (tuple) {
rb_gc_mark(tuple->doc);
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
}
static void
-dealloc(xmlDocPtr doc)
+dealloc(void *data)
{
+ xmlDocPtr doc = (xmlDocPtr)data;
st_table *node_hash;
node_hash = DOC_UNLINKED_NODE_HASH(doc);
@@ -84,6 +86,45 @@ dealloc(xmlDocPtr doc)
xmlFreeDoc(doc);
}
+static size_t
+memsize_node(const xmlNodePtr node)
+{
+ /* note we don't count namespace definitions, just going for a good-enough number here */
+ xmlNodePtr child;
+ size_t memsize = 0;
+
+ memsize += xmlStrlen(node->name);
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
+ memsize += sizeof(xmlAttr) + memsize_node(child);
+ }
+ if (node->type == XML_TEXT_NODE) {
+ memsize += xmlStrlen(node->content);
+ }
+ for (child = node->children; child; child = child->next) {
+ memsize += sizeof(xmlNode) + memsize_node(child);
+ }
+ return memsize;
+}
+
+static size_t
+memsize(const void *data)
+{
+ xmlDocPtr doc = (const xmlDocPtr)data;
+ size_t memsize = sizeof(xmlDoc);
+ /* This may not account for all memory use */
+ memsize += memsize_node((xmlNodePtr)doc);
+ return memsize;
+}
+
+const rb_data_type_t noko_xml_document_data_type = {
+ .wrap_struct_name = "Nokogiri::XML::Document",
+ .function = {
+ .dmark = mark,
+ .dfree = dealloc,
+ .dsize = memsize,
+ }
+};
+
static void
recursively_remove_namespaces_from_node(xmlNodePtr node)
{
@@ -127,7 +168,7 @@ static VALUE
url(VALUE self)
{
xmlDocPtr doc;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
@@ -146,7 +187,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
xmlDocPtr c_document;
xmlNodePtr c_new_root = NULL, c_current_root;
- Data_Get_Struct(self, xmlDoc, c_document);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_document);
c_current_root = xmlDocGetRootElement(c_document);
if (c_current_root) {
@@ -190,7 +231,7 @@ rb_xml_document_root(VALUE self)
xmlDocPtr c_document;
xmlNodePtr c_root;
- Data_Get_Struct(self, xmlDoc, c_document);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_document);
c_root = xmlDocGetRootElement(c_document);
if (!c_root) {
@@ -210,7 +251,7 @@ static VALUE
set_encoding(VALUE self, VALUE encoding)
{
xmlDocPtr doc;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
if (doc->encoding) {
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -231,7 +272,7 @@ static VALUE
encoding(VALUE self)
{
xmlDocPtr doc;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
if (!doc->encoding) { return Qnil; }
return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -247,7 +288,7 @@ static VALUE
version(VALUE self)
{
xmlDocPtr doc;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
if (!doc->version) { return Qnil; }
return NOKOGIRI_STR_NEW2(doc->version);
@@ -369,7 +410,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
level = INT2NUM((long)1);
}
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
@@ -443,7 +484,7 @@ static VALUE
remove_namespaces_bang(VALUE self)
{
xmlDocPtr doc ;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
return self;
@@ -471,7 +512,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
xmlEntityPtr ptr;
xmlDocPtr doc ;
- Data_Get_Struct(self, xmlDoc, doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
&content);
@@ -559,7 +600,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
}
}
- Data_Get_Struct(self, xmlDoc, c_doc);
+ TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_doc);
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -607,7 +648,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
klass = cNokogiriXmlDocument;
}
- rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
tuple->doc = rb_document;
diff --git a/ext/nokogiri/xml_document_fragment.c b/ext/nokogiri/xml_document_fragment.c
index 1a82eed1ec0..bd534c3df1c 100644
--- a/ext/nokogiri/xml_document_fragment.c
+++ b/ext/nokogiri/xml_document_fragment.c
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
rb_scan_args(argc, argv, "1*", &document, &rest);
- Data_Get_Struct(document, xmlDoc, xml_doc);
+ TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
node = xmlNewDocFragment(xml_doc->doc);
diff --git a/ext/nokogiri/xml_entity_reference.c b/ext/nokogiri/xml_entity_reference.c
index d8f12438c5a..272ea9ffec0 100644
--- a/ext/nokogiri/xml_entity_reference.c
+++ b/ext/nokogiri/xml_entity_reference.c
@@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)
rb_scan_args(argc, argv, "2*", &document, &name, &rest);
- Data_Get_Struct(document, xmlDoc, xml_doc);
+ TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
node = xmlNewReference(
xml_doc,
diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c
index 7ddc639cbe0..204df446fe4 100644
--- a/ext/nokogiri/xml_node.c
+++ b/ext/nokogiri/xml_node.c
@@ -984,7 +984,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
if (n_args < 2) {
new_parent_doc = node->doc;
} else {
- Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
+ TypedData_Get_Struct(r_new_parent_doc, xmlDoc, &noko_xml_document_data_type, new_parent_doc);
}
dup = xmlDocCopyNode(node, new_parent_doc, level);
diff --git a/ext/nokogiri/xml_processing_instruction.c b/ext/nokogiri/xml_processing_instruction.c
index f4a6dc2eb28..76358cc0cc9 100644
--- a/ext/nokogiri/xml_processing_instruction.c
+++ b/ext/nokogiri/xml_processing_instruction.c
@@ -22,7 +22,7 @@ new (int argc, VALUE *argv, VALUE klass)
rb_scan_args(argc, argv, "3*", &document, &name, &content, &rest);
- Data_Get_Struct(document, xmlDoc, xml_doc);
+ TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
node = xmlNewDocPI(
xml_doc,
diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c
index 943b0a7cd52..4533abd8838 100644
--- a/ext/nokogiri/xml_relax_ng.c
+++ b/ext/nokogiri/xml_relax_ng.c
@@ -23,7 +23,7 @@ validate_document(VALUE self, VALUE document)
xmlRelaxNGValidCtxtPtr valid_ctxt;
Data_Get_Struct(self, xmlRelaxNG, schema);
- Data_Get_Struct(document, xmlDoc, doc);
+ TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, doc);
errors = rb_ary_new();
@@ -127,7 +127,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
- Data_Get_Struct(document, xmlDoc, doc);
+ TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, doc);
doc = doc->doc; /* In case someone passes us a node. ugh. */
if (scanned_args == 1) {
diff --git a/ext/nokogiri/xslt_stylesheet.c b/ext/nokogiri/xslt_stylesheet.c
index 700c754ee3b..720adda8632 100644
--- a/ext/nokogiri/xslt_stylesheet.c
+++ b/ext/nokogiri/xslt_stylesheet.c
@@ -79,7 +79,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
xmlDocPtr xml, xml_cpy;
VALUE errstr, exception;
xsltStylesheetPtr ss ;
- Data_Get_Struct(xmldocobj, xmlDoc, xml);
+ TypedData_Get_Struct(xmldocobj, xmlDoc, &noko_xml_document_data_type, xml);
errstr = rb_str_new(0, 0);
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
@@ -114,7 +114,12 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
int doc_len ;
VALUE rval ;
- Data_Get_Struct(xmlobj, xmlDoc, xml);
+ TypedData_Get_Struct(
+ xmlobj,
+ xmlDoc,
+ &noko_xml_document_data_type,
+ xml
+ );
TypedData_Get_Struct(
self,
nokogiriXsltStylesheetTuple,
@@ -265,7 +270,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
Check_Type(paramobj, T_ARRAY);
- Data_Get_Struct(xmldoc, xmlDoc, xml);
+ TypedData_Get_Struct(xmldoc, xmlDoc, &noko_xml_document_data_type, xml);
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
param_len = RARRAY_LEN(paramobj);
diff --git a/test/test_memory_leak.rb b/test/test_memory_leak.rb
index 808040d25a9..3b8dc32d1e2 100644
--- a/test/test_memory_leak.rb
+++ b/test/test_memory_leak.rb
@@ -272,6 +272,47 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
end
end # if NOKOGIRI_GC
+ def test_object_space_memsize_of
+ require "objspace"
+ skip("memsize_of not defined") unless ObjectSpace.respond_to?(:memsize_of)
+
+ base_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+
+ asdf
+
+ XML
+
+ more_children_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+
+ asdf
+ asdf
+ asdf
+
+ XML
+ assert(more_children_size > base_size, "adding children should increase memsize")
+
+ attributes_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+
+ asdf
+
+ XML
+ assert(attributes_size > base_size, "adding attributes should increase memsize")
+
+ string_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+
+ asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdf
+
+ XML
+ assert(string_size > base_size, "longer strings should increase memsize")
+
+ bigger_name_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+
+ asdf
+
+ XML
+ assert(bigger_name_size > base_size, "longer tags should increase memsize")
+ end
+
module MemInfo
# from https://stackoverflow.com/questions/7220896/get-current-ruby-process-memory-usage
# this is only going to work on linux