From 83cc451c3f29df397caa890afc3b714eae6ab8f7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 6 May 2022 21:57:41 -0400 Subject: [PATCH] fix: {HTML4,XML}::SAX::{Parser,ParserContext} check arg types Previously, arguments of the wrong type might cause segfault on CRuby. --- ext/java/nokogiri/Html4SaxParserContext.java | 11 +++++++++++ ext/java/nokogiri/XmlSaxParserContext.java | 7 +++++-- ext/java/nokogiri/internals/ParserContext.java | 8 +++++++- ext/nokogiri/html4_sax_parser_context.c | 5 ++--- ext/nokogiri/xml_sax_parser_context.c | 13 ++++++++++--- lib/nokogiri/html4/sax/parser.rb | 2 +- test/html4/sax/test_parser.rb | 8 +++++++- test/html4/sax/test_parser_context.rb | 9 +++++++++ test/xml/sax/test_parser.rb | 8 +++++++- test/xml/sax/test_parser_context.rb | 7 +++++++ 10 files changed, 66 insertions(+), 12 deletions(-) diff --git a/ext/java/nokogiri/Html4SaxParserContext.java b/ext/java/nokogiri/Html4SaxParserContext.java index b03217ec6e3..91f5b0a58e1 100644 --- a/ext/java/nokogiri/Html4SaxParserContext.java +++ b/ext/java/nokogiri/Html4SaxParserContext.java @@ -231,6 +231,13 @@ static EncodingType get(final int ordinal) IRubyObject data, IRubyObject encoding) { + if (!(data instanceof RubyString)) { + throw context.getRuntime().newTypeError("data must be kind_of String"); + } + if (!(encoding instanceof RubyString)) { + throw context.getRuntime().newTypeError("data must be kind_of String"); + } + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); ctx.setInputSourceFile(context, data); String javaEncoding = findEncodingName(context, encoding); @@ -247,6 +254,10 @@ static EncodingType get(final int ordinal) IRubyObject data, IRubyObject encoding) { + if (!(encoding instanceof RubyFixnum)) { + throw context.getRuntime().newTypeError("encoding must be kind_of String"); + } + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); ctx.setIOInputSource(context, data, context.nil); String javaEncoding = findEncodingName(context, encoding); diff --git a/ext/java/nokogiri/XmlSaxParserContext.java b/ext/java/nokogiri/XmlSaxParserContext.java index 573c069740b..8cca652ca0d 100644 --- a/ext/java/nokogiri/XmlSaxParserContext.java +++ b/ext/java/nokogiri/XmlSaxParserContext.java @@ -130,9 +130,12 @@ public class XmlSaxParserContext extends ParserContext parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, - IRubyObject enc) + IRubyObject encoding) { - //int encoding = (int)enc.convertToInteger().getLongValue(); + // check the type of the unused encoding to match behavior of CRuby + if (!(encoding instanceof RubyFixnum)) { + throw context.getRuntime().newTypeError("encoding must be kind_of String"); + } final Ruby runtime = context.runtime; XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); ctx.initialize(runtime); diff --git a/ext/java/nokogiri/internals/ParserContext.java b/ext/java/nokogiri/internals/ParserContext.java index 6eda7be5e2c..b8d393a5204 100644 --- a/ext/java/nokogiri/internals/ParserContext.java +++ b/ext/java/nokogiri/internals/ParserContext.java @@ -58,6 +58,12 @@ public abstract class ParserContext extends RubyObject source = new InputSource(); ParserContext.setUrl(context, source, url); + Ruby ruby = context.getRuntime(); + + if (!(data.respondsTo("read"))) { + throw ruby.newTypeError("must respond to :read"); + } + source.setByteStream(new IOInputStream(data)); if (java_encoding != null) { source.setEncoding(java_encoding); @@ -73,7 +79,7 @@ public abstract class ParserContext extends RubyObject Ruby ruby = context.getRuntime(); if (!(data instanceof RubyString)) { - throw ruby.newArgumentError("must be kind_of String"); + throw ruby.newTypeError("must be kind_of String"); } RubyString stringData = (RubyString) data; diff --git a/ext/nokogiri/html4_sax_parser_context.c b/ext/nokogiri/html4_sax_parser_context.c index dec0d88fe5d..54adca4cb45 100644 --- a/ext/nokogiri/html4_sax_parser_context.c +++ b/ext/nokogiri/html4_sax_parser_context.c @@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding) { htmlParserCtxtPtr ctxt; - if (NIL_P(data)) { - rb_raise(rb_eArgError, "data cannot be nil"); - } + Check_Type(data, T_STRING); + if (!(int)RSTRING_LEN(data)) { rb_raise(rb_eRuntimeError, "data cannot be empty"); } diff --git a/ext/nokogiri/xml_sax_parser_context.c b/ext/nokogiri/xml_sax_parser_context.c index 60449347ae1..60c491984ba 100644 --- a/ext/nokogiri/xml_sax_parser_context.c +++ b/ext/nokogiri/xml_sax_parser_context.c @@ -2,6 +2,8 @@ VALUE cNokogiriXmlSaxParserContext ; +static ID id_read; + static void deallocate(xmlParserCtxtPtr ctxt) { @@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding) xmlParserCtxtPtr ctxt; xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding); + if (!rb_respond_to(io, id_read)) { + rb_raise(rb_eTypeError, "argument expected to respond to :read"); + } + ctxt = xmlCreateIOParserCtxt(NULL, NULL, (xmlInputReadCallback)noko_io_read, (xmlInputCloseCallback)noko_io_close, @@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data) { xmlParserCtxtPtr ctxt; - if (NIL_P(data)) { - rb_raise(rb_eArgError, "data cannot be nil"); - } + Check_Type(data, T_STRING); + if (!(int)RSTRING_LEN(data)) { rb_raise(rb_eRuntimeError, "data cannot be empty"); } @@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context() rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0); rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0); rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0); + + id_read = rb_intern("read"); } diff --git a/lib/nokogiri/html4/sax/parser.rb b/lib/nokogiri/html4/sax/parser.rb index 2a074b6b925..1f8c03af6c4 100644 --- a/lib/nokogiri/html4/sax/parser.rb +++ b/lib/nokogiri/html4/sax/parser.rb @@ -28,7 +28,7 @@ class Parser < Nokogiri::XML::SAX::Parser ### # Parse html stored in +data+ using +encoding+ def parse_memory(data, encoding = "UTF-8") - raise ArgumentError unless data + raise TypeError unless String === data return if data.empty? ctx = ParserContext.memory(data, encoding) diff --git a/test/html4/sax/test_parser.rb b/test/html4/sax/test_parser.rb index 3917dce2e74..4dc6ac61fd0 100644 --- a/test/html4/sax/test_parser.rb +++ b/test/html4/sax/test_parser.rb @@ -54,7 +54,7 @@ def test_parse_file_with_dir end def test_parse_memory_nil - assert_raises(ArgumentError) do + assert_raises(TypeError) do @parser.parse_memory(nil) end end @@ -161,6 +161,12 @@ def test_parsing_dom_error_from_io def test_empty_processing_instruction @parser.parse_memory("this will segfault") end + + it "handles invalid types gracefully" do + assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) } + end end end end diff --git a/test/html4/sax/test_parser_context.rb b/test/html4/sax/test_parser_context.rb index 0a0eda64c89..594a3ac8215 100644 --- a/test/html4/sax/test_parser_context.rb +++ b/test/html4/sax/test_parser_context.rb @@ -40,6 +40,15 @@ def test_from_file ctx.parse_with(parser) # end end + + def test_graceful_handling_of_invalid_types + assert_raises(TypeError) { ParserContext.new(0xcafecafe) } + assert_raises(TypeError) { ParserContext.memory(0xcafecafe, "UTF-8") } + assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) } + assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") } + assert_raises(TypeError) { ParserContext.file(0xcafecafe, "UTF-8") } + assert_raises(TypeError) { ParserContext.file("path/to/file", 0xcafecafe) } + end end end end diff --git a/test/xml/sax/test_parser.rb b/test/xml/sax/test_parser.rb index db7b11d2f67..4a847529db0 100644 --- a/test/xml/sax/test_parser.rb +++ b/test/xml/sax/test_parser.rb @@ -71,6 +71,12 @@ class Nokogiri::SAX::TestCase end end + it "handles invalid types gracefully" do + assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_memory(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_io(0xcafecafe) } + end + it :test_namespace_declaration_order_is_saved do parser.parse(<<~EOF) @@ -261,7 +267,7 @@ def call_parse_io_with_encoding(encoding) end it :test_render_parse_nil_param do - assert_raises(ArgumentError) { parser.parse_memory(nil) } + assert_raises(TypeError) { parser.parse_memory(nil) } end it :test_bad_encoding_args do diff --git a/test/xml/sax/test_parser_context.rb b/test/xml/sax/test_parser_context.rb index e38efd271cb..ba985794393 100644 --- a/test/xml/sax/test_parser_context.rb +++ b/test/xml/sax/test_parser_context.rb @@ -80,6 +80,13 @@ def test_recovery assert(pc.recovery) end + def test_graceful_handling_of_invalid_types + assert_raises(TypeError) { ParserContext.new(0xcafecafe) } + assert_raises(TypeError) { ParserContext.memory(0xcafecafe) } + assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) } + assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") } + end + def test_from_io ctx = ParserContext.new(StringIO.new("fo"), "UTF-8") assert(ctx)