Skip to content

Commit

Permalink
Make linkcheck work on servers without HEAD, and add FTP status check…
Browse files Browse the repository at this point in the history
…ing (#934)

* Make linkcheck work on servers without HEAD, and add FTP status checking

* Make linkcheck work on curl as early as 7.18.2

(cherry picked from commit 63a853e)
  • Loading branch information
iamed2 authored and mortenpi committed Feb 5, 2019
1 parent 79036ee commit 621bcb8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 14 deletions.
34 changes: 20 additions & 14 deletions src/DocChecks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ function linkcheck(doc::Documents.Document)
return nothing
end

function linkcheck(link::Markdown.Link, doc::Documents.Document)
function linkcheck(link::Markdown.Link, doc::Documents.Document; method::Symbol=:HEAD)

# first, make sure we're not supposed to ignore this link
for r in doc.user.linkcheck_ignore
Expand All @@ -180,7 +180,8 @@ function linkcheck(link::Markdown.Link, doc::Documents.Document)
end

if !haskey(doc.internal.locallinks, link)
cmd = `curl -sI --proto =http,https,ftp,ftps $(link.url) --max-time 10`
null_file = @static Sys.iswindows() ? "nul" : "/dev/null"
cmd = `curl $(method === :HEAD ? "-sI" : "-s") --proto =http,https,ftp,ftps $(link.url) --max-time 10 -o $null_file --write-out "%{http_code} %{url_effective} %{redirect_url}"`

local result
try
Expand All @@ -191,27 +192,32 @@ function linkcheck(link::Markdown.Link, doc::Documents.Document)
@warn "$cmd failed:" exception = err
return false
end
HTTP_STATUS_REGEX = r"^HTTP/(1.1|2) (\d+) (.+)$"m
FTP_STATUS_REGEX = r"^Last-Modified: (.+)\r\nContent-Length: (\d+)(?:\r\n(.*))?$"s
if occursin(HTTP_STATUS_REGEX, result)
status = parse(Int, match(HTTP_STATUS_REGEX, result).captures[2])
if status < 300
STATUS_REGEX = r"^(\d+) (\w+)://(?:\S+) (\S+)?$"m
matched = match(STATUS_REGEX, result)
if matched !== nothing
status, scheme, location = matched.captures
status = parse(Int, status)
scheme = uppercase(scheme)
protocol = startswith(scheme, "HTTP") ? :HTTP :
startswith(scheme, "FTP") ? :FTP : :UNKNOWN

if (protocol === :HTTP && status < 300) ||
(protocol === :FTP && (200 <= status < 300 || status == 350))
@debug "linkcheck '$(link.url)' status: $(status)."
elseif status < 400
LOCATION_REGEX = r"^Location: (.+)$"m
if occursin(LOCATION_REGEX, result)
location = strip(match(LOCATION_REGEX, result).captures[1])
elseif protocol === :HTTP && status < 400
if location !== nothing
@warn "linkcheck '$(link.url)' status: $(status), redirects to $(location)."
else
@warn "linkcheck '$(link.url)' status: $(status)."
end
elseif protocol === :HTTP && status == 405 && method === :HEAD
# when a server doesn't support HEAD requests, fallback to GET
@debug "linkcheck '$(link.url)' status: $(status), retrying without `-I`"
return linkcheck(link, doc; method=:GET)
else
push!(doc.internal.errors, :linkcheck)
@error "linkcheck '$(link.url)' status: $(status)."
end
elseif occursin(FTP_STATUS_REGEX, result)
# this regex is matched iff success
@debug "linkcheck '$(link.url)': FTP success"
else
push!(doc.internal.errors, :linkcheck)
@warn "invalid result returned by $cmd:" result
Expand Down
1 change: 1 addition & 0 deletions test/docchecks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ using Documenter.Documents
[FTP success](ftp://ftp.iana.org/tz/data/etcetera)
[FTP (no proto) success](ftp.iana.org/tz/data/etcetera)
[Redirect success](google.com)
[HEAD fail GET success](https://codecov.io/gh/invenia/LibPQ.jl)
"""

Documents.walk(Dict{Symbol, Any}(), src) do block
Expand Down

0 comments on commit 621bcb8

Please sign in to comment.