diff --git a/ext/java/nokogiri/HtmlElementDescription.java b/ext/java/nokogiri/HtmlElementDescription.java
index 43851fe2c44..048663e8b44 100644
--- a/ext/java/nokogiri/HtmlElementDescription.java
+++ b/ext/java/nokogiri/HtmlElementDescription.java
@@ -105,10 +105,11 @@ protected static List findSubElements(HTMLElements.Element elem) {
public static IRubyObject get(ThreadContext context,
IRubyObject klazz, IRubyObject name) {
- HTMLElements.Element elem = HTMLElements.getElement(name.toString());
+ HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
if (elem == HTMLElements.NO_SUCH_ELEMENT)
return context.getRuntime().getNil();
+ elem = HTMLElements.getElement(name.toString());
HtmlElementDescription desc =
new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
desc.element = elem;
diff --git a/ext/java/nokogiri/NokogiriService.java b/ext/java/nokogiri/NokogiriService.java
index 5f1b19240de..954bfdbbb55 100644
--- a/ext/java/nokogiri/NokogiriService.java
+++ b/ext/java/nokogiri/NokogiriService.java
@@ -36,6 +36,7 @@
import java.util.HashMap;
import java.util.Map;
+import org.cyberneko.html.HTMLElements;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
@@ -53,6 +54,24 @@
* @author Yoko Harada
*/
public class NokogiriService implements BasicLibraryService {
+
+ // nekohtml from version 1.9.13 they autocomplete tbody around
+ // tr tags of a table - http://sourceforge.net/p/nekohtml/code/241/
+ // this monkey patch undoes this autocompletion
+ static class MonkeyPatchHTMLElements extends HTMLElements {
+ static void patchIt() {
+ Element[] array = ELEMENTS_ARRAY['T'-'A'];
+ for(int i = 0; i < array.length; i++) {
+ if (array[i].name.equals("TR")) {
+ array[i] = new Element(TR, "TR", Element.BLOCK, TABLE, new short[]{TD,TH,TR,COLGROUP,DIV});
+ }
+ }
+ }
+ }
+ static {
+ MonkeyPatchHTMLElements.patchIt();
+ }
+
public static final String nokogiriClassCacheGvarName = "$NOKOGIRI_CLASS_CACHE";
public boolean basicLoad(Ruby ruby) {
diff --git a/ext/java/nokogiri/internals/NokogiriErrorHandler.java b/ext/java/nokogiri/internals/NokogiriErrorHandler.java
index 1464b1e1e4a..93d5867983c 100644
--- a/ext/java/nokogiri/internals/NokogiriErrorHandler.java
+++ b/ext/java/nokogiri/internals/NokogiriErrorHandler.java
@@ -79,8 +79,10 @@ public List getErrorsReadyForRuby(ThreadContext context) {
return res;
}
- protected boolean usesNekoHtml(String domain) {
- if ("http://cyberneko.org/html".equals(domain)) return true;
- else return false;
+ protected void add(Exception e){
+ // this message might be bound to the nekohtml version 1.9.21
+ if (!e.getMessage().equals("No character encoding indicator at beginning of document.")) {
+ errors.add(e);
+ }
}
}
diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java
index 3ecf85c922a..0a71425cf5d 100644
--- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java
+++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java
@@ -115,7 +115,7 @@ public void fatalError(String domain, String key, XMLParseException e) {
* @param e Exception.
*/
public void warning(String domain, String key, XMLParseException e) {
- //noop. NekoHtml adds too many warnings.
+ add(e);
}
}
diff --git a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
index 5ecab98d7f9..9c5edb3d52e 100644
--- a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
+++ b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
@@ -73,7 +73,6 @@ public void fatalError(String domain, String key, XMLParseException e) throws XM
public void warning(String domain, String key, XMLParseException e) throws XMLParseException {
if (!nowarning) throw e;
- if (!usesNekoHtml(domain)) errors.add(e);
+ else add(e);
}
-
}
diff --git a/lib/nekohtml.jar b/lib/nekohtml.jar
index 5b61ccf2185..d46601dcb5e 100644
Binary files a/lib/nekohtml.jar and b/lib/nekohtml.jar differ
diff --git a/test/html/test_node_encoding.rb b/test/html/test_node_encoding.rb
index e3716664fdc..d22fb3e74ec 100644
--- a/test/html/test_node_encoding.rb
+++ b/test/html/test_node_encoding.rb
@@ -21,6 +21,25 @@ def test_inner_html
contents = doc.at('h2').inner_html
assert_match hello, contents
end
+
+ def test_encoding_GH_1113
+ doc = Nokogiri::HTML::Document.new
+ hex = '🍀
'
+ decimal = '🍀
'
+ encoded = '🍀
'
+
+ doc.encoding = 'UTF-8'
+ expected = encoded
+ assert_equal expected, doc.fragment(hex).to_s
+ assert_equal expected, doc.fragment(decimal).to_s
+ assert_equal expected, doc.fragment(encoded).to_s
+
+ doc.encoding = 'US-ASCII'
+ expected = defined?(JRUBY_VERSION) ? hex : decimal
+ assert_equal expected, doc.fragment(hex).to_s
+ assert_equal expected, doc.fragment(decimal).to_s
+ assert_equal expected, doc.fragment(encoded).to_s
+ end
end
end
end