diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index fcbd08ba..967e0f7f 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -1,4 +1,6 @@ require 'nokogiri' +require 'find' +require 'html/proofer/checkable' require 'html/proofer/checks' module HTML @@ -24,7 +26,10 @@ def run end end - if !@failedTests.empty? + if @failedTests.empty? + puts "Tests executed sucesfully.".green + exit 0 + else # make the hash default to 0 so that += will work correctly count = Hash.new(0) @@ -41,7 +46,7 @@ def run end def self.create_nokogiri(path) - path << "index.html" if File.directory? path #support for Jekyll-style links + path << "/index.html" if File.directory? path #support for Jekyll-style links Nokogiri::HTML(File.read(path)) end diff --git a/lib/html/proofer/check.rb b/lib/html/proofer/check.rb index c92ffb15..b49d1a39 100644 --- a/lib/html/proofer/check.rb +++ b/lib/html/proofer/check.rb @@ -10,7 +10,7 @@ class HTML::Proofer::Checks class Check - attr_reader :issues, :hydra + attr_reader :issues, :hydra, :src, :path, :options, :additional_href_ignores def initialize(src, path, html, opts={}) @src = src @@ -28,31 +28,13 @@ def run end def add_issue(desc) - @issues << desc + @issues << "#{@path.blue}: #{desc}" end def output_filenames Dir[@site.config[:output_dir] + '/**/*'].select{ |f| File.file?(f) } end - def external_href?(href) - uri = URI.parse(href) - %w( http https ).include?(uri.scheme) - rescue URI::BadURIError - false - rescue URI::InvalidURIError - false - end - - def ignore_href?(href) - uri = URI.parse(href) - %w( mailto ).include?(uri.scheme) || @additional_href_ignores.include?(href) - rescue URI::BadURIError - false - rescue URI::InvalidURIError - false - end - def validate_url(href, issue_text) request = Typhoeus::Request.new(href, {:followlocation => true}) request.on_complete do |response| @@ -93,34 +75,5 @@ def self.subclasses classes end - def resolve_path(path) - - #Strip anchor, not needed to resolve file - path = path.split('#').first - - if path =~ /^\// #path relative to root - base = @src - elsif File.exist? File.join Dir.pwd, @path #relative links, path is a file - base = File.dirname(@path) - else #relative link, path is a directory - base = @path - end - - if path =~ /^#/ #anchor link, no trailing slash - path = "#{base}#{path}" - else # relative path, resolve trailing slashes automatically - path = File.join base, path - end - - # implicit /index.html support, with support for tailing slashes - path = File.join path, "index.html" if File.directory? File.join Dir.pwd, path - - path - end - - # checks if a file exists relative to the current pwd - def file?(path) - File.exist? File.join Dir.pwd, resolve_path(path) - end end end diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb new file mode 100644 index 00000000..9922dd26 --- /dev/null +++ b/lib/html/proofer/checkable.rb @@ -0,0 +1,106 @@ +module HTML + class Proofer + class Checkable + + def initialize(obj, check) + @src = obj['src'] + @href = obj['href'] + @alt = obj['alt'] + @name = obj['name'] + @id = obj['id'] + @check = check + + if @href && @check.options[:href_swap] + @options[:href_swap].each do |link, replace| + @href = @href.gsub(link, replace) + end + end + + end + + def url + @src || @href || "" + end + + def valid? + begin + URI.parse url + rescue + false + end + end + + def parts + URI.parse url + end + + def path + parts.path + end + + def hash + parts.fragment + end + + # path is to an external server + def remote? + uri = URI.parse url + %w( http https ).include?(uri.scheme) + rescue URI::BadURIError + false + rescue URI::InvalidURIError + false + end + + def ignore? + uri = URI.parse url + %w( mailto ).include?(uri.scheme) || @check.additional_href_ignores.include?(href) + rescue URI::BadURIError + false + rescue URI::InvalidURIError + false + end + + # path is external to the file + def external? + !internal? + end + + # path is an anchor + def internal? + url[0] == "#" + end + + def file_path + + return if path.nil? + + if path =~ /^\// #path relative to root + base = @check.src + elsif File.exist? File.expand_path path, @check.src #relative links, path is a file + base = File.dirname @check.path + else #relative link, path is a directory + base = @check.path + end + + file = File.join base, path + + # implicit /index.html support, with support for tailing slashes + file = File.join path, "index.html" if File.directory? File.expand_path file, @check.src + + file + end + + # checks if a file exists relative to the current pwd + def exists? + File.exist? absolute_path + end + + def absolute_path + path = file_path || @check.path + File.expand_path path, Dir.pwd + end + + end + end +end diff --git a/lib/html/proofer/checks/images.rb b/lib/html/proofer/checks/images.rb index 0e18ac4a..cd0d538c 100644 --- a/lib/html/proofer/checks/images.rb +++ b/lib/html/proofer/checks/images.rb @@ -1,30 +1,50 @@ # encoding: utf-8 +class Image < ::HTML::Proofer::Checkable + + SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/ + + def valid_alt_tag? + @alt and !@alt.empty? + end + + def terrible_filename? + @src =~ SCREEN_SHOT_REGEX + end + + def src + @src unless @src.nil? || @src.empty? + end + + def missing_src? + !src + end + +end + class Images < ::HTML::Proofer::Checks::Check def run @html.css('img').each do |img| - src = img['src'] - - # check image sources - if src && src.length > 0 - if !external_href?(src) - self.add_issue("#{@path}".blue + ": internal image #{src} does not exist") unless file? src - else - validate_url(src, "#{@path}".blue + ": external image #{src} does not exist") - end + + img = Image.new img, self + + # screenshot filenames, return because invalid URL + return self.add_issue "image has a terrible filename (#{img.src})" if img.terrible_filename? + + # does the image exist? + if img.missing_src? + self.add_issue "image has no src attribute" + elsif img.remote? + validate_url img.src, "external image #{img.src} does not exist" else - self.add_issue("#{@path}".blue + ": image has no src attribute") + self.add_issue("internal image #{img.src} does not exist") unless img.exists? end # check alt tag - self.add_issue("#{@path}".blue + ": image #{src} does not have an alt attribute") unless img['alt'] and !img['alt'].empty? + self.add_issue "image #{img.src} does not have an alt attribute" unless img.valid_alt_tag? - screenShotRegExp = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/ - if src =~ screenShotRegExp - self.add_issue("#{@path}".blue + ": image has a terrible filename (#{src})") - end end end end diff --git a/lib/html/proofer/checks/links.rb b/lib/html/proofer/checks/links.rb index b0b6772f..37182959 100644 --- a/lib/html/proofer/checks/links.rb +++ b/lib/html/proofer/checks/links.rb @@ -1,56 +1,48 @@ # encoding: utf-8 +class Link < ::HTML::Proofer::Checkable + + def href + @href unless @href.nil? || @href.empty? + end + + def missing_href? + href.nil? and @name.nil? and @id.nil? + end + +end + class Links < ::HTML::Proofer::Checks::Check def run - @html.css('a').each do |a| - href = a['href'] - - if href && href.length > 0 - if @options[:href_swap] - @options[:href_swap].each do |link, replace| - href = href.gsub(link, replace) - end - end + @html.css('a').each do |link| - return if ignore_href?(href) + link = Link.new link, self + return if link.ignore? - if href.include? '#' - href_split = href.split('#') - end - if !external_href?(href) - - # an internal link, with a hash - if href_split && !href_split.empty? - href_file = href_split[0] - href_hash = href_split[1] - - # it's not an internal hash; it's pointing to some other file - if href_file.length > 0 - if !file?(href_file) - self.add_issue("#{@path}".blue + ": internal link #{href_file} does not exist") - else - href_html = HTML::Proofer.create_nokogiri(resolve_path(href_file)) - found_hash_match = false - unless hash_check(href_html, href_hash) - self.add_issue("#{@path}".blue + ": linking to #{href}, but #{href_hash} does not exist") - end - end - # it is an internal link, with an internal hash - else - unless hash_check(@html, href_hash) - self.add_issue("#{@path}".blue + ": linking to an internal hash called #{href_hash} that does not exist") - end - end - # internal link, no hash - else - self.add_issue("#{@path}".blue + ": internally linking to #{href}, which does not exist") unless file?(href) - end - else - validate_url(href, "#{@path}".blue + ": externally linking to #{href}, which does not exist") - end + # is there even a href? + return self.add_issue("link has no href attribute") if link.missing_href? + + # is it even a valid URL? + return self.add_issue "#{link.href} is an invalid URL" unless link.valid? + + # does the file even exist? + if link.remote? + validate_url link.href, "externally linking to #{link.href}, which does not exist" else - self.add_issue("#{@path}".blue + ": link has no href attribute") unless a['name'] || a['id'] + self.add_issue "internally linking to #{link.href}, which does not exist" unless link.exists? + end + + # verify the target hash + if link.hash + if link.remote? + #not yet checked + elsif link.internal? + self.add_issue "linking to internal hash ##{link.hash} that does not exist" unless hash_check @html, link.hash + elsif link.external? + target_html = HTML::Proofer.create_nokogiri link.absolute_path + self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash + end end end end diff --git a/spec/html/proofer/fixtures/brokenInternalLink.html b/spec/html/proofer/fixtures/brokenInternalLink.html new file mode 100644 index 00000000..e69de29b diff --git a/spec/html/proofer/fixtures/brokenLinkWithNumber.html b/spec/html/proofer/fixtures/brokenLinkWithNumber.html index 1e316b14..31a75f94 100644 --- a/spec/html/proofer/fixtures/brokenLinkWithNumber.html +++ b/spec/html/proofer/fixtures/brokenLinkWithNumber.html @@ -2,7 +2,7 @@
- + -