diff --git a/CHANGELOG.ja.rdoc b/CHANGELOG.ja.rdoc index 7a1e5c90191..afa21dc9dd3 100644 --- a/CHANGELOG.ja.rdoc +++ b/CHANGELOG.ja.rdoc @@ -11,7 +11,7 @@ * (MRI) extconf.rb のオプション --use-system-libraries を追加 環境変数 NOKOGIRI_USE_SYSTEM_LIBRARIES を設定する代わりに使える * (MRI) 同梱の libxslt を 1.1.28 に、 zlib を 1.2.8 に、 libiconv を 1.14 にそれぞれ更新した - * Nokogiri::HTML::Document#title= は、head要素がない場合でも常に、最適な場所を探しつつtitle要素を追加するようになった + * Nokogiri::HTML::Document#title= 及び #meta_encoding= は、head要素がない場合でも常に、最適な場所を探しつつ要素を追加するようになった * Nokogiri::XML::DTD#html_dtd? と #html5_dtd? を追加 * Nokogiri::XML::Node#prepend_child を追加 #664 diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc index fa2b4df607e..fb1535b38bc 100644 --- a/CHANGELOG.rdoc +++ b/CHANGELOG.rdoc @@ -18,9 +18,9 @@ setting the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES. * (MRI) Update packaged libraries: libxslt to 1.1.28, zlib to 1.2.8, and libiconv to 1.14, respectively. - * Nokogiri::HTML::Document#title= now always add a title element - even if a head element is not present, trying hard to find the - best place to put it. + * Nokogiri::HTML::Document#title= and #meta_encoding= now always add + an element if not present, trying hard to find the best place to + put it. * Nokogiri::XML::DTD#html_dtd? and #html5_dtd? are added. * Nokogiri::XML::Node#prepend_child is added. #664 diff --git a/lib/nokogiri/html/document.rb b/lib/nokogiri/html/document.rb index f06c08d60a1..1b26b8482f1 100644 --- a/lib/nokogiri/html/document.rb +++ b/lib/nokogiri/html/document.rb @@ -14,11 +14,42 @@ def meta_encoding end ### - # Set the meta tag encoding for this document. If there is no meta - # content tag, the encoding is not set. + # Set the meta tag encoding for this document. + # + # If an meta encoding tag is already present, its content is + # replaced with the given text. + # + # Otherwise, this method tries to create one at an appropriate + # place supplying head and/or html elements as necessary, which + # is inside a head element if any, and before any text node or + # content element (typically ) if any. + # + # The result when trying to set an encoding that is different + # from the document encoding is undefined. def meta_encoding= encoding - meta = meta_content_type and - meta['content'] = "text/html; charset=%s" % encoding + case + when meta = meta_content_type + meta['content'] = 'text/html; charset=%s' % encoding + encoding + when meta = at('//meta[@charset]') + meta['charset'] = encoding + else + meta = XML::Node.new('meta', self) + if dtd = internal_subset and dtd.html5_dtd? + meta['charset'] = encoding + else + meta['http-equiv'] = 'Content-Type' + meta['content'] = 'text/html; charset=%s' % encoding + end + + case + when head = at('//head') + head.prepend_child(meta) + else + set_metadata_element(meta) + end + encoding + end end def meta_content_type @@ -60,8 +91,19 @@ def title=(text) when meta = at('//meta[@charset]') || meta_content_type # better put after charset declaration meta.add_next_sibling(title) + else + set_metadata_element(title) + end + text + end + + def set_metadata_element(element) + case + when head = at('//head') + head << element when html = at('//html') - html.prepend_child(XML::Node.new('head', self) << title) + head = html.prepend_child(XML::Node.new('head', self)) + head.prepend_child(element) when first = children.find { |node| case node when XML::Element, XML::Text @@ -71,12 +113,14 @@ def title=(text) # We reach here only if the underlying document model # allows / elements to be omitted and does not # automatically supply them. - first.add_previous_sibling(title) + first.add_previous_sibling(element) else - add_child(XML::Node.new('html', self) << (XML::Node.new('head', self) << title)) + html = add_child(XML::Node.new('html', self)) + head = html.add_child(XML::Node.new('head', self)) + head.prepend_child(element) end - text end + private :set_metadata_element #### # Serialize Node using +options+. Save options can also be set using a diff --git a/test/html/test_document.rb b/test/html/test_document.rb index 621972ab4f9..626bdf14cbe 100644 --- a/test/html/test_document.rb +++ b/test/html/test_document.rb @@ -264,11 +264,33 @@ def test_title=() end def test_meta_encoding_without_head - html = Nokogiri::HTML('foo') + encoding = 'EUC-JP' + html = Nokogiri::HTML('foo', nil, encoding) + assert_nil html.meta_encoding - html.meta_encoding = 'EUC-JP' + html.meta_encoding = encoding + assert_equal encoding, html.meta_encoding + + meta = html.at('/html/head/meta[@http-equiv and boolean(@content)]') + assert meta, 'meta is in head' + + assert meta.at('./parent::head/following-sibling::body'), 'meta is before body' + end + + def test_html5_meta_encoding_without_head + encoding = 'EUC-JP' + html = Nokogiri::HTML('foo', nil, encoding) + assert_nil html.meta_encoding + + html.meta_encoding = encoding + assert_equal encoding, html.meta_encoding + + meta = html.at('/html/head/meta[@charset]') + assert meta, 'meta is in head' + + assert meta.at('./parent::head/following-sibling::body'), 'meta is before body' end def test_meta_encoding_with_empty_content_type