Skip to content

Commit

Permalink
Refactor internal link paths resolution / existence check
Browse files Browse the repository at this point in the history
* We now construct and maintain a hash of resolved paths, as a way to have a single instance of going through OS for checking the existence of alternative resolved paths, including assumed extension and directory index file.
  • Loading branch information
riccardoporreca committed Mar 14, 2023
1 parent e0e0170 commit d52d828
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 23 deletions.
38 changes: 25 additions & 13 deletions lib/html_proofer/attribute/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -118,22 +118,42 @@ def query_values
def exists?
return true if base64?

return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
!resolved_path.nil?
end

def resolved_path
path_to_resolve = absolute_path

return @runner.resolved_paths[path_to_resolve] if @runner.resolved_paths.key?(path_to_resolve)

# extensionless URLs
path_with_extension = "#{path_to_resolve}#{@runner.options[:assume_extension]}"
resolved = if @runner.options[:assume_extension] && File.file?(path_with_extension)
path_with_extension # existence checked implicitly by File.file?
# implicit index support
elsif File.directory?(path_to_resolve) && !unslashed_directory?(path_to_resolve)
path_with_index = File.join(path_to_resolve, @runner.options[:directory_index_file])
path_with_index if File.file?(path_with_index)
# explicit file or directory
elsif File.exist?(path_to_resolve)
path_to_resolve
end
@runner.resolved_paths[path_to_resolve] = resolved

@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
resolved
end

def base64?
/^data:image/.match?(@raw_attribute)
end

def absolute_path
path = file_path || @filename
path = resolve_path || @filename

File.expand_path(path, Dir.pwd)
end

def file_path
def resolve_path
return if path.nil? || path.empty?

base = if absolute_path?(path) # path relative to root
Expand All @@ -144,15 +164,7 @@ def file_path
File.dirname(@filename)
end

file = File.join(base, path)

if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
file = "#{file}#{@runner.options[:assume_extension]}"
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
file = File.join(file, @runner.options[:directory_index_file])
end

file
File.join(base, path)
end

def unslashed_directory?(file)
Expand Down
4 changes: 2 additions & 2 deletions lib/html_proofer/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module HTMLProofer
class Runner
include HTMLProofer::Utils

attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :checked_paths, :current_check
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :resolved_paths, :current_check
attr_accessor :current_filename, :current_source, :reporter

URL_TYPES = [:external, :internal].freeze
Expand All @@ -26,7 +26,7 @@ def initialize(src, opts = {})

@before_request = []

@checked_paths = {}
@resolved_paths = {}

@current_check = nil
@current_source = nil
Expand Down
10 changes: 2 additions & 8 deletions lib/html_proofer/url_validator/internal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ def run_internal_link_checker(links)
matched_files.each do |metadata|
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url], source: metadata[:source], filename: metadata[:filename])

target_file_path = url.absolute_path
unless file_exists?(target_file_path)
unless url.exists?
@failed_checks << Failure.new(
metadata[:filename],
"Links > Internal",
Expand All @@ -48,6 +47,7 @@ def run_internal_link_checker(links)
hash_exists = hash_exists_for_url?(url)
if hash_exists.nil?
# the hash needs to be checked in the target file, we collect the url and metadata
target_file_path = url.resolved_path
unless file_paths_hashes_to_check.key?(target_file_path)
file_paths_hashes_to_check[target_file_path] = {}
end
Expand Down Expand Up @@ -106,12 +106,6 @@ def run_internal_link_checker(links)
@failed_checks
end

private def file_exists?(absolute_path)
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)

@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
end

# verify the hash w/o just based on the URL, w/o looking at the target file
# => returns nil if the has could not be verified
private def hash_exists_for_url?(url)
Expand Down

0 comments on commit d52d828

Please sign in to comment.