Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor #6

Merged
merged 12 commits into from
Oct 14, 2013
11 changes: 8 additions & 3 deletions lib/html/proofer.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
require 'nokogiri'
require 'find'
require 'html/proofer/checkable'
require 'html/proofer/checks'

module HTML
Expand All @@ -16,15 +18,18 @@ def run
Find.find(@srcDir) do |path|
if File.extname(path) == @options[:ext]
html = HTML::Proofer.create_nokogiri(path)
check = klass.new(path, html, @options)
check = klass.new(@srcDir, path, html, @options)
check.run
check.hydra.run
self.print_issues(klass, check.issues)
end
end
end

if [email protected]?
if @failedTests.empty?
puts "Tests executed sucesfully.".green
exit 0
else
# make the hash default to 0 so that += will work correctly
count = Hash.new(0)

Expand All @@ -41,7 +46,7 @@ def run
end

def self.create_nokogiri(path)
path << "index.html" if File.directory? path #support for Jekyll-style links
path << "/index.html" if File.directory? path #support for Jekyll-style links
Nokogiri::HTML(File.read(path))
end

Expand Down
26 changes: 5 additions & 21 deletions lib/html/proofer/check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ class HTML::Proofer::Checks

class Check

attr_reader :issues, :hydra
attr_reader :issues, :hydra, :src, :path, :options, :additional_href_ignores

def initialize(path, html, opts={})
def initialize(src, path, html, opts={})
@src = src
@path = path
@html = html
@options = opts
Expand All @@ -27,31 +28,13 @@ def run
end

def add_issue(desc)
@issues << desc
@issues << "#{@path.blue}: #{desc}"
end

def output_filenames
Dir[@site.config[:output_dir] + '/**/*'].select{ |f| File.file?(f) }
end

def external_href?(href)
uri = URI.parse(href)
%w( http https ).include?(uri.scheme)
rescue URI::BadURIError
false
rescue URI::InvalidURIError
false
end

def ignore_href?(href)
uri = URI.parse(href)
%w( mailto ).include?(uri.scheme) || @additional_href_ignores.include?(href)
rescue URI::BadURIError
false
rescue URI::InvalidURIError
false
end

def validate_url(href, issue_text)
request = Typhoeus::Request.new(href, {:followlocation => true})
request.on_complete do |response|
Expand Down Expand Up @@ -91,5 +74,6 @@ def self.subclasses

classes
end

end
end
106 changes: 106 additions & 0 deletions lib/html/proofer/checkable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
module HTML
class Proofer
class Checkable

def initialize(obj, check)
@src = obj['src']
@href = obj['href']
@alt = obj['alt']
@name = obj['name']
@id = obj['id']
@check = check

if @href && @check.options[:href_swap]
@options[:href_swap].each do |link, replace|
@href = @href.gsub(link, replace)
end
end

end

def url
@src || @href || ""
end

def valid?
begin
URI.parse url
rescue
false
end
end

def parts
URI.parse url
end

def path
parts.path
end

def hash
parts.fragment
end

# path is to an external server
def remote?
uri = URI.parse url
%w( http https ).include?(uri.scheme)
rescue URI::BadURIError
false
rescue URI::InvalidURIError
false
end

def ignore?
uri = URI.parse url
%w( mailto ).include?(uri.scheme) || @check.additional_href_ignores.include?(href)
rescue URI::BadURIError
false
rescue URI::InvalidURIError
false
end

# path is external to the file
def external?
!internal?
end

# path is an anchor
def internal?
url[0] == "#"
end

def file_path

return if path.nil?

if path =~ /^\// #path relative to root
base = @check.src
elsif File.exist? File.expand_path path, @check.src #relative links, path is a file
base = File.dirname @check.path
else #relative link, path is a directory
base = @check.path
end

file = File.join base, path

# implicit /index.html support, with support for tailing slashes
file = File.join path, "index.html" if File.directory? File.expand_path file, @check.src

file
end

# checks if a file exists relative to the current pwd
def exists?
File.exist? absolute_path
end

def absolute_path
path = file_path || @check.path
File.expand_path path, Dir.pwd
end

end
end
end
52 changes: 36 additions & 16 deletions lib/html/proofer/checks/images.rb
Original file line number Diff line number Diff line change
@@ -1,30 +1,50 @@
# encoding: utf-8

class Image < ::HTML::Proofer::Checkable

SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/

def valid_alt_tag?
@alt and [email protected]?
end

def terrible_filename?
@src =~ SCREEN_SHOT_REGEX
end

def src
@src unless @src.nil? || @src.empty?
end

def missing_src?
!src
end

end

class Images < ::HTML::Proofer::Checks::Check

def run
@html.css('img').each do |img|
src = img['src']

# check image sources
if src && src.length > 0
if !external_href?(src)
self.add_issue("#{@path}".blue + ": internal image #{src} does not exist") unless src[0] != "/" and File.exist?(File.join(File.dirname(@path), src))
else
validate_url(src, "#{@path}".blue + ": external image #{src} does not exist")
end

img = Image.new img, self

# screenshot filenames, return because invalid URL
return self.add_issue "image has a terrible filename (#{img.src})" if img.terrible_filename?

# does the image exist?
if img.missing_src?
self.add_issue "image has no src attribute"
elsif img.remote?
validate_url img.src, "external image #{img.src} does not exist"
else
self.add_issue("#{@path}".blue + ": image has no src attribute")
self.add_issue("internal image #{img.src} does not exist") unless img.exists?
end

# check alt tag
self.add_issue("#{@path}".blue + ": image #{src} does not have an alt attribute") unless img['alt'] and !img['alt'].empty?

screenShotRegExp = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
self.add_issue "image #{img.src} does not have an alt attribute" unless img.valid_alt_tag?


if src =~ screenShotRegExp
self.add_issue("#{@path}".blue + ": image has a terrible filename (#{src})")
end
end
end
end
84 changes: 35 additions & 49 deletions lib/html/proofer/checks/links.rb
Original file line number Diff line number Diff line change
@@ -1,57 +1,48 @@
# encoding: utf-8

class Link < ::HTML::Proofer::Checkable

def href
@href unless @href.nil? || @href.empty?
end

def missing_href?
href.nil? and @name.nil? and @id.nil?
end

end

class Links < ::HTML::Proofer::Checks::Check

def run
@html.css('a').each do |a|
href = a['href']
@html.css('a').each do |link|

if href && href.length > 0
if @options[:href_swap]
@options[:href_swap].each do |link, replace|
href = href.gsub(link, replace)
end
end
link = Link.new link, self
return if link.ignore?

return if ignore_href?(href)
# is there even a href?
return self.add_issue("link has no href attribute") if link.missing_href?

if href.include? '#'
href_split = href.split('#')
end
if !external_href?(href)
# an internal link, with a hash
if href_split && !href_split.empty?
href_file = href_split[0]
href_hash = href_split[1]
# is it even a valid URL?
return self.add_issue "#{link.href} is an invalid URL" unless link.valid?

# it's not an internal hash; it's pointing to some other file
if href_file.length > 0
href_location = resolve_path File.join(File.dirname(@path), href_file)
if !File.exist?(href_location)
self.add_issue("#{@path}".blue + ": internal link #{href_location} does not exist")
else
href_html = HTML::Proofer.create_nokogiri(href_location)
found_hash_match = false
unless hash_check(href_html, href_hash)
self.add_issue("#{@path}".blue + ": linking to #{href}, but #{href_hash} does not exist")
end
end
# it is an internal link, with an internal hash
else
unless hash_check(@html, href_hash)
self.add_issue("#{@path}".blue + ": linking to an internal hash called #{href_hash} that does not exist")
end
end
# internal link, no hash
else
href_location = resolve_path File.join(File.dirname(@path), href)
self.add_issue("#{@path}".blue + ": internally linking to #{href_location}, which does not exist") unless File.exist?(href_location)
end
else
validate_url(href, "#{@path}".blue + ": externally linking to #{href}, which does not exist")
end
# does the file even exist?
if link.remote?
validate_url link.href, "externally linking to #{link.href}, which does not exist"
else
self.add_issue("#{@path}".blue + ": link has no href attribute") unless a['name'] || a['id']
self.add_issue "internally linking to #{link.href}, which does not exist" unless link.exists?
end

# verify the target hash
if link.hash
if link.remote?
#not yet checked
elsif link.internal?
self.add_issue "linking to internal hash ##{link.hash} that does not exist" unless hash_check @html, link.hash
elsif link.external?
target_html = HTML::Proofer.create_nokogiri link.absolute_path
self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash
end
end
end
end
Expand All @@ -60,9 +51,4 @@ def hash_check(html, href_hash)
html.xpath("//*[@id='#{href_hash}']", "//*[@name='#{href_hash}']").length > 0
end

#support for implicit /index.html in URLs
def resolve_path(path)
path << "index.html" if File.directory? path
path
end
end
Empty file.
4 changes: 2 additions & 2 deletions spec/html/proofer/fixtures/brokenLinkWithNumber.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<body>

<p>Blah blah blah. <a name="25-method-not-allowed" class="anchor" href="#25-method-not-allowed"><span class="mini-icon mini-icon-link"></span></a>25 Method not allowed</h4></p>
<p>Blah blah blah. <a class="anchor" href="#25-method-not-allowed"><span class="mini-icon mini-icon-link"></span></a>25 Method not allowed</h4></p>
</body>

</html>
</html>
Empty file.
2 changes: 1 addition & 1 deletion spec/html/proofer/fixtures/index.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
root.
<h1 id="anchor">root.</a>
9 changes: 0 additions & 9 deletions spec/html/proofer/fixtures/missingImageDirPrefix.html

This file was deleted.

Empty file.
4 changes: 4 additions & 0 deletions spec/html/proofer/fixtures/relativeLinks.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<a href="/index.html">Relative to root</a>
<a href="index.html">Relative to self</a>
<a href="folder/">Folder relative to self</a>
<a href="index.html#anchor">Anchor relative to self</a>
Loading