diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0493b07170d..fdd157d8625 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA
---
+## next / unreleased
+
+### Improved
+
+* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
+
+
## 1.12.3 / 2021-08-10
### Fixed
diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c
index c28fb751a9b..545a39b1066 100644
--- a/ext/nokogiri/xml_node.c
+++ b/ext/nokogiri/xml_node.c
@@ -1375,12 +1375,12 @@ native_write_to(
* Returns the line for this Node
*/
static VALUE
-line(VALUE self)
+rb_xml_node_line(VALUE rb_node)
{
- xmlNodePtr node;
- Data_Get_Struct(self, xmlNode, node);
+ xmlNodePtr c_node;
+ Data_Get_Struct(rb_node, xmlNode, c_node);
- return INT2NUM(xmlGetLineNo(node));
+ return INT2NUM(xmlGetLineNo(c_node));
}
/*
@@ -1390,17 +1390,25 @@ line(VALUE self)
* Sets the line for this Node. num must be less than 65535.
*/
static VALUE
-set_line(VALUE self, VALUE num)
+rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
{
- xmlNodePtr node;
- int value = NUM2INT(num);
+ xmlNodePtr c_node;
+ int line_number = NUM2INT(rb_line_number);
- Data_Get_Struct(self, xmlNode, node);
- if (value < 65535) {
- node->line = value;
+ Data_Get_Struct(rb_node, xmlNode, c_node);
+
+ // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
+ // search for "psvi" in SAX2.c and tree.c to learn more.
+ if (line_number < 65535) {
+ c_node->line = (short) line_number;
+ } else {
+ c_node->line = 65535;
+ if (c_node->type == XML_TEXT_NODE) {
+ c_node->psvi = (void *)(ptrdiff_t) line_number;
+ }
}
- return num;
+ return rb_line_number;
}
/*
@@ -1805,8 +1813,8 @@ noko_init_xml_node()
rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
rb_define_method(cNokogiriXmlNode, "pointer_id", pointer_id, 0);
- rb_define_method(cNokogiriXmlNode, "line", line, 0);
- rb_define_method(cNokogiriXmlNode, "line=", set_line, 1);
+ rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
+ rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
rb_define_method(cNokogiriXmlNode, "content", get_native_content, 0);
rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
diff --git a/lib/nokogiri/xml/parse_options.rb b/lib/nokogiri/xml/parse_options.rb
index 36186ab7b91..12019198ab3 100644
--- a/lib/nokogiri/xml/parse_options.rb
+++ b/lib/nokogiri/xml/parse_options.rb
@@ -68,15 +68,17 @@ class ParseOptions
NOBASEFIX = 1 << 18
# relax any hardcoded limit from the parser
HUGE = 1 << 19
+ # line numbers stored as long int (instead of a short int)
+ BIG_LINES = 1 << 22
# the default options used for parsing XML documents
- DEFAULT_XML = RECOVER | NONET
+ DEFAULT_XML = RECOVER | NONET | BIG_LINES
# the default options used for parsing XSLT stylesheets
- DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
# the default options used for parsing HTML documents
- DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
# the default options used for parsing XML schemas
- DEFAULT_SCHEMA = NONET
+ DEFAULT_SCHEMA = NONET | BIG_LINES
attr_accessor :options
def initialize options = STRICT
diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb
index f125d41d54c..48cec9b2971 100644
--- a/test/xml/test_node.rb
+++ b/test/xml/test_node.rb
@@ -1258,6 +1258,29 @@ def test_wrap
assert_equal(2, set[0].line)
assert_equal(5, set[1].line)
end
+
+ it "supports a line number greater than a short int" do
+ max_short_int = (1 << 16) - 1
+ xml = StringIO.new.tap do |io|
+ io << ""
+ max_short_int.times do |j|
+ io << "#{j}\n"
+ io << "#{j}\n"
+ end
+ io << "\n"
+ io << ""
+ end.string
+
+ if Nokogiri.uses_libxml?
+ doc = Nokogiri::XML(xml) { |c| c.nobig_lines }
+ node = doc.at_css("x")
+ assert_operator(node.line, :==, max_short_int)
+ end
+
+ doc = Nokogiri::XML(xml)
+ node = doc.at_css("x")
+ assert_operator(node.line, :>, max_short_int)
+ end
end
describe "#line=" do
@@ -1268,6 +1291,18 @@ def test_wrap
node.line = 54321
assert_equal(54321, node.line)
end
+
+ it "supports a line number greater than a short int for text nodes" do
+ skip_unless_libxml2("Xerces does not have line numbers for nodes")
+
+ max_short_int = (1 << 16) - 1
+ line_number = max_short_int + 100
+
+ document = Nokogiri::XML::Document.parse("text node")
+ node = document.at_css("a").children.first
+ node.line = line_number
+ assert_equal(line_number, node.line)
+ end
end
end
end