diff --git a/ext/nokogiri/html4_document.c b/ext/nokogiri/html4_document.c
index 939321e3bda..7b816d5ff04 100644
--- a/ext/nokogiri/html4_document.c
+++ b/ext/nokogiri/html4_document.c
@@ -145,7 +145,7 @@ static VALUE
 rb_html_document_type(VALUE self)
 {
   htmlDocPtr doc;
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
   return INT2NUM(doc->type);
 }
 
diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h
index 4c6bea855e9..bc7f515f992 100644
--- a/ext/nokogiri/nokogiri.h
+++ b/ext/nokogiri/nokogiri.h
@@ -166,6 +166,8 @@ typedef struct _nokogiriXsltStylesheetTuple {
   VALUE func_instances;
 } nokogiriXsltStylesheetTuple;
 
+extern const rb_data_type_t noko_xml_document_data_type;
+
 void noko_xml_document_pin_node(xmlNodePtr);
 void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
 
diff --git a/ext/nokogiri/xml_comment.c b/ext/nokogiri/xml_comment.c
index 81b57ca472c..35db13d098f 100644
--- a/ext/nokogiri/xml_comment.c
+++ b/ext/nokogiri/xml_comment.c
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
     rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
   }
 
-  Data_Get_Struct(document, xmlDoc, xml_doc);
+  TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
 
   node = xmlNewDocComment(
            xml_doc,
diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c
index a55daa1b55e..8dff604bec7 100644
--- a/ext/nokogiri/xml_document.c
+++ b/ext/nokogiri/xml_document.c
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
 }
 
 static void
-mark(xmlDocPtr doc)
+mark(void *data)
 {
+  xmlDocPtr doc = (xmlDocPtr)data;
   nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
   if (tuple) {
     rb_gc_mark(tuple->doc);
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
 }
 
 static void
-dealloc(xmlDocPtr doc)
+dealloc(void *data)
 {
+  xmlDocPtr doc = (xmlDocPtr)data;
   st_table *node_hash;
 
   node_hash  = DOC_UNLINKED_NODE_HASH(doc);
@@ -84,6 +86,45 @@ dealloc(xmlDocPtr doc)
   xmlFreeDoc(doc);
 }
 
+static size_t
+memsize_node(const xmlNodePtr node)
+{
+  /* note we don't count namespace definitions, just going for a good-enough number here */
+  xmlNodePtr child;
+  size_t memsize = 0;
+
+  memsize += xmlStrlen(node->name);
+  for (child = (xmlNodePtr)node->properties; child; child = child->next) {
+    memsize += sizeof(xmlAttr) + memsize_node(child);
+  }
+  if (node->type == XML_TEXT_NODE) {
+    memsize += xmlStrlen(node->content);
+  }
+  for (child = node->children; child; child = child->next) {
+    memsize += sizeof(xmlNode) + memsize_node(child);
+  }
+  return memsize;
+}
+
+static size_t
+memsize(const void *data)
+{
+  xmlDocPtr doc = (const xmlDocPtr)data;
+  size_t memsize = sizeof(xmlDoc);
+  /* This may not account for all memory use */
+  memsize += memsize_node((xmlNodePtr)doc);
+  return memsize;
+}
+
+const rb_data_type_t noko_xml_document_data_type = {
+  .wrap_struct_name = "Nokogiri::XML::Document",
+  .function = {
+    .dmark = mark,
+    .dfree = dealloc,
+    .dsize = memsize,
+  }
+};
+
 static void
 recursively_remove_namespaces_from_node(xmlNodePtr node)
 {
@@ -127,7 +168,7 @@ static VALUE
 url(VALUE self)
 {
   xmlDocPtr doc;
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
 
@@ -146,7 +187,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
   xmlDocPtr c_document;
   xmlNodePtr c_new_root = NULL, c_current_root;
 
-  Data_Get_Struct(self, xmlDoc, c_document);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_document);
 
   c_current_root = xmlDocGetRootElement(c_document);
   if (c_current_root) {
@@ -190,7 +231,7 @@ rb_xml_document_root(VALUE self)
   xmlDocPtr c_document;
   xmlNodePtr c_root;
 
-  Data_Get_Struct(self, xmlDoc, c_document);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_document);
 
   c_root = xmlDocGetRootElement(c_document);
   if (!c_root) {
@@ -210,7 +251,7 @@ static VALUE
 set_encoding(VALUE self, VALUE encoding)
 {
   xmlDocPtr doc;
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   if (doc->encoding) {
     xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
@@ -231,7 +272,7 @@ static VALUE
 encoding(VALUE self)
 {
   xmlDocPtr doc;
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   if (!doc->encoding) { return Qnil; }
   return NOKOGIRI_STR_NEW2(doc->encoding);
@@ -247,7 +288,7 @@ static VALUE
 version(VALUE self)
 {
   xmlDocPtr doc;
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   if (!doc->version) { return Qnil; }
   return NOKOGIRI_STR_NEW2(doc->version);
@@ -369,7 +410,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
     level = INT2NUM((long)1);
   }
 
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   dup = xmlCopyDoc(doc, (int)NUM2INT(level));
 
@@ -443,7 +484,7 @@ static VALUE
 remove_namespaces_bang(VALUE self)
 {
   xmlDocPtr doc ;
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   recursively_remove_namespaces_from_node((xmlNodePtr)doc);
   return self;
@@ -471,7 +512,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
   xmlEntityPtr ptr;
   xmlDocPtr doc ;
 
-  Data_Get_Struct(self, xmlDoc, doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
 
   rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
                &content);
@@ -559,7 +600,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
     }
   }
 
-  Data_Get_Struct(self, xmlDoc, c_doc);
+  TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_doc);
 
   rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
   rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
@@ -607,7 +648,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
     klass = cNokogiriXmlDocument;
   }
 
-  rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
+  rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
 
   tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
   tuple->doc = rb_document;
diff --git a/ext/nokogiri/xml_document_fragment.c b/ext/nokogiri/xml_document_fragment.c
index 1a82eed1ec0..bd534c3df1c 100644
--- a/ext/nokogiri/xml_document_fragment.c
+++ b/ext/nokogiri/xml_document_fragment.c
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
 
   rb_scan_args(argc, argv, "1*", &document, &rest);
 
-  Data_Get_Struct(document, xmlDoc, xml_doc);
+  TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
 
   node = xmlNewDocFragment(xml_doc->doc);
 
diff --git a/ext/nokogiri/xml_entity_reference.c b/ext/nokogiri/xml_entity_reference.c
index d8f12438c5a..272ea9ffec0 100644
--- a/ext/nokogiri/xml_entity_reference.c
+++ b/ext/nokogiri/xml_entity_reference.c
@@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)
 
   rb_scan_args(argc, argv, "2*", &document, &name, &rest);
 
-  Data_Get_Struct(document, xmlDoc, xml_doc);
+  TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
 
   node = xmlNewReference(
            xml_doc,
diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c
index 7ddc639cbe0..204df446fe4 100644
--- a/ext/nokogiri/xml_node.c
+++ b/ext/nokogiri/xml_node.c
@@ -984,7 +984,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
   if (n_args < 2) {
     new_parent_doc = node->doc;
   } else {
-    Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
+    TypedData_Get_Struct(r_new_parent_doc, xmlDoc, &noko_xml_document_data_type, new_parent_doc);
   }
 
   dup = xmlDocCopyNode(node, new_parent_doc, level);
diff --git a/ext/nokogiri/xml_processing_instruction.c b/ext/nokogiri/xml_processing_instruction.c
index f4a6dc2eb28..76358cc0cc9 100644
--- a/ext/nokogiri/xml_processing_instruction.c
+++ b/ext/nokogiri/xml_processing_instruction.c
@@ -22,7 +22,7 @@ new (int argc, VALUE *argv, VALUE klass)
 
   rb_scan_args(argc, argv, "3*", &document, &name, &content, &rest);
 
-  Data_Get_Struct(document, xmlDoc, xml_doc);
+  TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);
 
   node = xmlNewDocPI(
            xml_doc,
diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c
index 943b0a7cd52..4533abd8838 100644
--- a/ext/nokogiri/xml_relax_ng.c
+++ b/ext/nokogiri/xml_relax_ng.c
@@ -23,7 +23,7 @@ validate_document(VALUE self, VALUE document)
   xmlRelaxNGValidCtxtPtr valid_ctxt;
 
   Data_Get_Struct(self, xmlRelaxNG, schema);
-  Data_Get_Struct(document, xmlDoc, doc);
+  TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, doc);
 
   errors = rb_ary_new();
 
@@ -127,7 +127,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
 
   scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
 
-  Data_Get_Struct(document, xmlDoc, doc);
+  TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, doc);
   doc = doc->doc; /* In case someone passes us a node. ugh. */
 
   if (scanned_args == 1) {
diff --git a/ext/nokogiri/xslt_stylesheet.c b/ext/nokogiri/xslt_stylesheet.c
index 700c754ee3b..720adda8632 100644
--- a/ext/nokogiri/xslt_stylesheet.c
+++ b/ext/nokogiri/xslt_stylesheet.c
@@ -79,7 +79,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
   xmlDocPtr xml, xml_cpy;
   VALUE errstr, exception;
   xsltStylesheetPtr ss ;
-  Data_Get_Struct(xmldocobj, xmlDoc, xml);
+  TypedData_Get_Struct(xmldocobj, xmlDoc, &noko_xml_document_data_type, xml);
 
   errstr = rb_str_new(0, 0);
   xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
@@ -114,7 +114,12 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
   int doc_len ;
   VALUE rval ;
 
-  Data_Get_Struct(xmlobj, xmlDoc, xml);
+  TypedData_Get_Struct(
+    xmlobj,
+    xmlDoc,
+    &noko_xml_document_data_type,
+    xml
+  );
   TypedData_Get_Struct(
     self,
     nokogiriXsltStylesheetTuple,
@@ -265,7 +270,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
 
   Check_Type(paramobj, T_ARRAY);
 
-  Data_Get_Struct(xmldoc, xmlDoc, xml);
+  TypedData_Get_Struct(xmldoc, xmlDoc, &noko_xml_document_data_type, xml);
   TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
 
   param_len = RARRAY_LEN(paramobj);
diff --git a/test/test_memory_leak.rb b/test/test_memory_leak.rb
index 808040d25a9..3b8dc32d1e2 100644
--- a/test/test_memory_leak.rb
+++ b/test/test_memory_leak.rb
@@ -272,6 +272,47 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
     end
   end # if NOKOGIRI_GC
 
+  def test_object_space_memsize_of
+    require "objspace"
+    skip("memsize_of not defined") unless ObjectSpace.respond_to?(:memsize_of)
+
+    base_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+      <root>
+        <child>asdf</child>
+      </root>
+    XML
+
+    more_children_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+      <root>
+        <child>asdf</child>
+        <child>asdf</child>
+        <child>asdf</child>
+      </root>
+    XML
+    assert(more_children_size > base_size, "adding children should increase memsize")
+
+    attributes_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+      <root>
+        <child a="b" c="d">asdf</child>
+      </root>
+    XML
+    assert(attributes_size > base_size, "adding attributes should increase memsize")
+
+    string_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+      <root>
+        <child>asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdf</child>
+      </root>
+    XML
+    assert(string_size > base_size, "longer strings should increase memsize")
+
+    bigger_name_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
+      <root>
+        <superduperamazingchild>asdf</superduperamazingchild>
+      </root>
+    XML
+    assert(bigger_name_size > base_size, "longer tags should increase memsize")
+  end
+
   module MemInfo
     # from https://stackoverflow.com/questions/7220896/get-current-ruby-process-memory-usage
     # this is only going to work on linux