From a5bf3b2affadbec0d27f7bd1581376b70a16828f Mon Sep 17 00:00:00 2001 From: Yoko Harada Date: Tue, 8 Mar 2011 14:44:14 -0500 Subject: [PATCH] This fixes internal/external subsets creation and serialization --- ext/java/nokogiri/NokogiriService.java | 12 ++- ext/java/nokogiri/XmlDocument.java | 34 ++++----- ext/java/nokogiri/XmlDtd.java | 75 ++++++++++--------- ext/java/nokogiri/internals/XmlDomParser.java | 3 +- .../internals/XmlDomParserContext.java | 14 ++++ 5 files changed, 82 insertions(+), 56 deletions(-) diff --git a/ext/java/nokogiri/NokogiriService.java b/ext/java/nokogiri/NokogiriService.java index 5efc1d34d0f..80afa6a857d 100644 --- a/ext/java/nokogiri/NokogiriService.java +++ b/ext/java/nokogiri/NokogiriService.java @@ -347,9 +347,17 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { } }; - private static ObjectAllocator XML_DTD_ALLOCATOR = new ObjectAllocator() { + public static final ObjectAllocator XML_DTD_ALLOCATOR = new ObjectAllocator() { + private XmlDtd xmlDtd = null; public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new XmlDtd(runtime, klazz); + if (xmlDtd == null) xmlDtd = new XmlDtd(runtime, klazz); + try { + XmlDtd clone = (XmlDtd)xmlDtd.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDtd(runtime, klazz); + } } }; diff --git a/ext/java/nokogiri/XmlDocument.java b/ext/java/nokogiri/XmlDocument.java index 58f19da9ec0..74fa49d9d8f 100644 --- a/ext/java/nokogiri/XmlDocument.java +++ b/ext/java/nokogiri/XmlDocument.java @@ -76,8 +76,8 @@ public class XmlDocument extends XmlNode { /* UserData keys for storing extra info in the document node. */ public final static String DTD_RAW_DOCUMENT = "DTD_RAW_DOCUMENT"; - protected final static String DTD_INTERNAL_SUBSET = "DTD_INTERNAL_SUBSET"; - protected final static String DTD_EXTERNAL_SUBSET = "DTD_EXTERNAL_SUBSET"; + public final static String DTD_INTERNAL_SUBSET = "DTD_INTERNAL_SUBSET"; + public final static String DTD_EXTERNAL_SUBSET = "DTD_EXTERNAL_SUBSET"; /* DocumentBuilderFactory implementation class name. This needs to set a classloader into it. * Setting an appropriate classloader resolves issue 380. @@ -420,16 +420,11 @@ public static IRubyObject substitute_entities_set(ThreadContext context, IRubyOb } public IRubyObject getInternalSubset(ThreadContext context) { - IRubyObject dtd = - (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET); + IRubyObject dtd = (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET); if (dtd == null) { - if (getDocument().getDoctype() == null) - dtd = context.getRuntime().getNil(); - else - dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), - getDocument()); - + if (getDocument().getDoctype() == null) dtd = context.getRuntime().getNil(); + else dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), getDocument()); setInternalSubset(dtd); } @@ -456,15 +451,9 @@ protected void setInternalSubset(IRubyObject data) { } public IRubyObject getExternalSubset(ThreadContext context) { - IRubyObject dtd = (IRubyObject) - node.getUserData(DTD_EXTERNAL_SUBSET); - - if (dtd == null) { - dtd = XmlDtd.newFromExternalSubset(context.getRuntime(), - getDocument()); - setExternalSubset(dtd); - } + IRubyObject dtd = (IRubyObject) node.getUserData(DTD_EXTERNAL_SUBSET); + if (dtd == null) return context.getRuntime().getNil(); return dtd; } @@ -518,6 +507,15 @@ public void saveContent(ThreadContext context, SaveContext ctx) { ctx.append("?>\n"); } + IRubyObject subset = getExternalSubset(context);; + if (subset != null && !subset.isNil()) { + ((XmlDtd)subset).saveContent(context, ctx); + } + subset = getInternalSubset(context); + if (subset != null && !subset.isNil()) { + ((XmlDtd)subset).saveContent(context, ctx); + } + IRubyObject maybeRoot = root(context); if (maybeRoot.isNil()) throw context.getRuntime().newRuntimeError("no root document"); diff --git a/ext/java/nokogiri/XmlDtd.java b/ext/java/nokogiri/XmlDtd.java index 264aced2795..32890289f04 100644 --- a/ext/java/nokogiri/XmlDtd.java +++ b/ext/java/nokogiri/XmlDtd.java @@ -98,43 +98,40 @@ public class XmlDtd extends XmlNode { public XmlDtd(Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); } + + public void setNode(Ruby runtime, Node dtd) { + this.node = dtd; + notationClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::Notation"); - public XmlDtd(Ruby ruby) { - this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DTD"), null); - } - - public XmlDtd(Ruby ruby, Node dtd) { - this(ruby, getNokogiriClass(ruby, "Nokogiri::XML::DTD"), dtd); - } - - public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) { - super(ruby, rubyClass, dtd); - notationClass = (RubyClass) - ruby.getClassFromPath("Nokogiri::XML::Notation"); - - name = pubId = sysId = ruby.getNil(); + name = pubId = sysId = runtime.getNil(); if (dtd == null) return; // This is the dtd declaration stored in the document; it // contains the DTD name (root element) and public and system - // ids. The actual declarations are in the NekoDTD 'dtd' + // ids. The actual declarations are in the NekoDTD 'dtd' // variable. I don't know of a way to consolidate the two. DocumentType otherDtd = dtd.getOwnerDocument().getDoctype(); if (otherDtd != null) { - name = stringOrNil(ruby, otherDtd.getNodeName()); - pubId = nonEmptyStringOrNil(ruby, otherDtd.getPublicId()); - sysId = nonEmptyStringOrNil(ruby, otherDtd.getSystemId()); + name = stringOrNil(runtime, otherDtd.getNodeName()); + pubId = nonEmptyStringOrNil(runtime, otherDtd.getPublicId()); + sysId = nonEmptyStringOrNil(runtime, otherDtd.getSystemId()); } } - public static XmlDtd newEmpty(Ruby ruby, + public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) { + super(ruby, rubyClass, dtd); + setNode(ruby, dtd); + } + + public static XmlDtd newEmpty(Ruby runtime, Document doc, IRubyObject name, IRubyObject external_id, IRubyObject system_id) { Element placeHolder = doc.createElement("dtd_placeholder"); - XmlDtd dtd = new XmlDtd(ruby, placeHolder); + XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); + dtd.setNode(runtime, placeHolder); dtd.name = name; dtd.pubId = external_id; dtd.sysId = system_id; @@ -155,38 +152,47 @@ public static XmlDtd newEmpty(Ruby ruby, * Document provided by NekoDTD. * */ - public static XmlDtd newFromInternalSubset(Ruby ruby, Document doc) { + public static XmlDtd newFromInternalSubset(Ruby runtime, Document doc) { Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); - if (dtdTree_ == null) - return new XmlDtd(ruby); + if (dtdTree_ == null) { + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, null); + return xmlDtd; + } Node dtdTree = (Node) dtdTree_; Node dtd = getInternalSubset(dtdTree); if (dtd == null) { - return new XmlDtd(ruby); + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, null); + return xmlDtd; } else { // Import the node into doc so it has the correct owner document. dtd = doc.importNode(dtd, true); - return new XmlDtd(ruby, dtd); + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, dtd); + return xmlDtd; } } - public static IRubyObject newFromExternalSubset(Ruby ruby, Document doc) { + public static IRubyObject newFromExternalSubset(Ruby runtime, Document doc) { Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); if (dtdTree_ == null) { - return ruby.getNil(); + return runtime.getNil(); } Node dtdTree = (Node) dtdTree_; Node dtd = getExternalSubset(dtdTree); if (dtd == null) { - return ruby.getNil(); + return runtime.getNil(); } else if (!dtd.hasChildNodes()) { - return ruby.getNil(); + return runtime.getNil(); } else { // Import the node into doc so it has the correct owner document. dtd = doc.importNode(dtd, true); - return new XmlDtd(ruby, dtd); + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, dtd); + return xmlDtd; } } @@ -457,10 +463,11 @@ protected void extractDecls(ThreadContext context, Node node) { public void saveContent(ThreadContext context, SaveContext ctx) { ctx.append(""); - } else if (sysId != null) { - ctx.append("SYSTEM " + sysId); + // either one of pubId or sysId exists in a single XmlDtd + if (pubId != null && !pubId.isNil()) { + ctx.append("PUBLIC \"" + pubId + "\">\n"); + } else if (sysId != null && !sysId.isNil()) { + ctx.append("SYSTEM \"" + sysId + "\">\n"); } } diff --git a/ext/java/nokogiri/internals/XmlDomParser.java b/ext/java/nokogiri/internals/XmlDomParser.java index e3e1d3d4685..e6b711cf273 100644 --- a/ext/java/nokogiri/internals/XmlDomParser.java +++ b/ext/java/nokogiri/internals/XmlDomParser.java @@ -71,7 +71,6 @@ public void parse(InputSource source) throws SAXException, IOException { if (doc == null) throw new RuntimeException("null document"); - doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), - null); + doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null); } } diff --git a/ext/java/nokogiri/internals/XmlDomParserContext.java b/ext/java/nokogiri/internals/XmlDomParserContext.java index 7208fbe7601..0ca2a0807f5 100644 --- a/ext/java/nokogiri/internals/XmlDomParserContext.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -42,9 +42,11 @@ import nokogiri.NokogiriService; import nokogiri.XmlDocument; +import nokogiri.XmlDtd; import nokogiri.XmlSyntaxError; import org.apache.xerces.parsers.DOMParser; +import org.apache.xerces.xni.XMLDTDHandler; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; @@ -52,6 +54,7 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; +import org.w3c.dom.DocumentType; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.EntityResolver; @@ -189,6 +192,17 @@ protected XmlDocument wrapDocument(ThreadContext context, XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz); xmlDocument.setNode(context, doc); xmlDocument.setEncoding(ruby_encoding); + + if (options.dtdLoad) { + XmlDtd xmlDtd = (XmlDtd) XmlDtd.newFromExternalSubset(context.getRuntime(), doc); + doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, xmlDtd, null); + } + + if (doc.getDoctype() != null) { + XmlDtd xmlDtd = XmlDtd.newFromInternalSubset(context.getRuntime(), doc); + doc.setUserData(XmlDocument.DTD_INTERNAL_SUBSET, xmlDtd, null); + } + return xmlDocument; }