From ad44cd4a3d6409e972f2c27f237facb6b5d7f2d9 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Wed, 20 Sep 2017 10:02:16 -0300 Subject: [PATCH] Let HTML.escape only escape &<>"' --- spec/std/html_spec.cr | 16 ++-------------- src/html.cr | 43 +++++++++++++++++-------------------------- 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/spec/std/html_spec.cr b/spec/std/html_spec.cr index 374cd454af72..fe672758a716 100644 --- a/spec/std/html_spec.cr +++ b/spec/std/html_spec.cr @@ -10,21 +10,9 @@ describe "HTML" do end it "escapes dangerous characters from a string" do - str = HTML.escape("< & >") + str = HTML.escape("< & > ' \"") - str.should eq("< & >") - end - - it "escapes javascript example from a string" do - str = HTML.escape("") - - str.should eq("<script>alert('You are being hacked')</script>") - end - - it "escapes nonbreakable space but not normal space" do - str = HTML.escape("nbspĀ space ") - - str.should eq("nbsp space ") + str.should eq("< & > ' "") end end diff --git a/src/html.cr b/src/html.cr index 708e40b1684c..b1c1c272ed35 100644 --- a/src/html.cr +++ b/src/html.cr @@ -1,28 +1,15 @@ -# Handles encoding and decoding of HTML entities. +# Provides HTML escaping and unescaping methods. module HTML - SUBSTITUTIONS = { - '!' => "!", - '"' => """, - '$' => "$", - '%' => "%", - '&' => "&", - '\'' => "'", - '(' => "(", - ')' => ")", - '=' => "=", - '>' => ">", - '<' => "<", - '+' => "+", - '@' => "@", - '[' => "[", - ']' => "]", - '`' => "`", - '{' => "{", - '}' => "}", - '\u{a0}' => " ", + private SUBSTITUTIONS = { + '&' => "&", + '<' => "<", + '>' => ">", + '"' => """, + '\'' => "'", } - # Encodes a string with HTML entity substitutions. + # Escapes special characters in HTML, namely + # `&`, `<`, `>`, `"` and `'`. # # ``` # require "html" @@ -33,25 +20,29 @@ module HTML string.gsub(SUBSTITUTIONS) end - # Encodes a string to HTML, but writes to the `IO` instance provided. + # Same as `escape(string)` but ouputs the result to + # the given *io*. # # ``` # io = IO::Memory.new # HTML.escape("Crystal & You", io) # => nil # io.to_s # => "Crystal & You" # ``` - def self.escape(string : String, io : IO) + def self.escape(string : String, io : IO) : Nil string.each_char do |char| io << SUBSTITUTIONS.fetch(char, char) end end - # Decodes a string that contains HTML entities. + # Returns a string where some named and all numeric character references + # (e.g. >, >, &x3e;) in *string* are replaced with the corresponding + # unicode characters. Only these named entities are replaced: + # apos, amp, quot, gt, lt and nbsp. # # ``` # HTML.unescape("Crystal & You") # => "Crystal & You" # ``` - def self.unescape(string : String) + def self.unescape(string : String) : String return string unless string.includes? '&' string.gsub(/&(apos|amp|quot|gt|lt|nbsp|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do |string, _match|