diff --git a/ext/java/nokogiri/HtmlElementDescription.java b/ext/java/nokogiri/HtmlElementDescription.java index 43851fe2c44..048663e8b44 100644 --- a/ext/java/nokogiri/HtmlElementDescription.java +++ b/ext/java/nokogiri/HtmlElementDescription.java @@ -105,10 +105,11 @@ protected static List findSubElements(HTMLElements.Element elem) { public static IRubyObject get(ThreadContext context, IRubyObject klazz, IRubyObject name) { - HTMLElements.Element elem = HTMLElements.getElement(name.toString()); + HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT); if (elem == HTMLElements.NO_SUCH_ELEMENT) return context.getRuntime().getNil(); + elem = HTMLElements.getElement(name.toString()); HtmlElementDescription desc = new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz); desc.element = elem; diff --git a/ext/java/nokogiri/NokogiriService.java b/ext/java/nokogiri/NokogiriService.java index 5f1b19240de..954bfdbbb55 100644 --- a/ext/java/nokogiri/NokogiriService.java +++ b/ext/java/nokogiri/NokogiriService.java @@ -36,6 +36,7 @@ import java.util.HashMap; import java.util.Map; +import org.cyberneko.html.HTMLElements; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; @@ -53,6 +54,24 @@ * @author Yoko Harada */ public class NokogiriService implements BasicLibraryService { + + // nekohtml from version 1.9.13 they autocomplete tbody around + // tr tags of a table - http://sourceforge.net/p/nekohtml/code/241/ + // this monkey patch undoes this autocompletion + static class MonkeyPatchHTMLElements extends HTMLElements { + static void patchIt() { + Element[] array = ELEMENTS_ARRAY['T'-'A']; + for(int i = 0; i < array.length; i++) { + if (array[i].name.equals("TR")) { + array[i] = new Element(TR, "TR", Element.BLOCK, TABLE, new short[]{TD,TH,TR,COLGROUP,DIV}); + } + } + } + } + static { + MonkeyPatchHTMLElements.patchIt(); + } + public static final String nokogiriClassCacheGvarName = "$NOKOGIRI_CLASS_CACHE"; public boolean basicLoad(Ruby ruby) { diff --git a/ext/java/nokogiri/internals/NokogiriErrorHandler.java b/ext/java/nokogiri/internals/NokogiriErrorHandler.java index 1464b1e1e4a..93d5867983c 100644 --- a/ext/java/nokogiri/internals/NokogiriErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriErrorHandler.java @@ -79,8 +79,10 @@ public List getErrorsReadyForRuby(ThreadContext context) { return res; } - protected boolean usesNekoHtml(String domain) { - if ("http://cyberneko.org/html".equals(domain)) return true; - else return false; + protected void add(Exception e){ + // this message might be bound to the nekohtml version 1.9.21 + if (!e.getMessage().equals("No character encoding indicator at beginning of document.")) { + errors.add(e); + } } } diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java index 3ecf85c922a..0a71425cf5d 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java @@ -115,7 +115,7 @@ public void fatalError(String domain, String key, XMLParseException e) { * @param e Exception. */ public void warning(String domain, String key, XMLParseException e) { - //noop. NekoHtml adds too many warnings. + add(e); } } diff --git a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java index 5ecab98d7f9..9c5edb3d52e 100644 --- a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java @@ -73,7 +73,6 @@ public void fatalError(String domain, String key, XMLParseException e) throws XM public void warning(String domain, String key, XMLParseException e) throws XMLParseException { if (!nowarning) throw e; - if (!usesNekoHtml(domain)) errors.add(e); + else add(e); } - } diff --git a/lib/nekohtml.jar b/lib/nekohtml.jar index 5b61ccf2185..d46601dcb5e 100644 Binary files a/lib/nekohtml.jar and b/lib/nekohtml.jar differ diff --git a/test/html/test_node_encoding.rb b/test/html/test_node_encoding.rb index e3716664fdc..d22fb3e74ec 100644 --- a/test/html/test_node_encoding.rb +++ b/test/html/test_node_encoding.rb @@ -21,6 +21,25 @@ def test_inner_html contents = doc.at('h2').inner_html assert_match hello, contents end + + def test_encoding_GH_1113 + doc = Nokogiri::HTML::Document.new + hex = '

🍀

' + decimal = '

🍀

' + encoded = '

🍀

' + + doc.encoding = 'UTF-8' + expected = encoded + assert_equal expected, doc.fragment(hex).to_s + assert_equal expected, doc.fragment(decimal).to_s + assert_equal expected, doc.fragment(encoded).to_s + + doc.encoding = 'US-ASCII' + expected = defined?(JRUBY_VERSION) ? hex : decimal + assert_equal expected, doc.fragment(hex).to_s + assert_equal expected, doc.fragment(decimal).to_s + assert_equal expected, doc.fragment(encoded).to_s + end end end end