Skip to content

Commit

Permalink
do not transform mailto links
Browse files Browse the repository at this point in the history
This will fix #538
  • Loading branch information
zkamvar committed Nov 9, 2023
1 parent 132a935 commit 99d420c
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
9 changes: 6 additions & 3 deletions R/utils-xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,12 @@ use_instructor <- function(nodes = NULL) {
if (length(nodes) == 0) return(nodes)
copy <- xml2::read_html(as.character(nodes))
# find all local links and transform non-html and nested links ---------
lnk <- xml2::xml_find_all(copy,
".//a[@href][not(contains(@href, '://')) and not(starts-with(@href, '#'))]"
)
no_external <- "not(contains(@href, '://'))"
no_anchors <- "not(starts-with(@href, '#'))"
no_mail <- "not(starts-with(@href, 'mailto:'))"
predicate <- paste(c(no_external, no_anchors, no_mail), collapse = " and ")
XPath <- sprintf(".//a[@href][%s]", predicate)
lnk <- xml2::xml_find_all(copy, XPath)
lnk_hrefs <- xml2::xml_attr(lnk, "href")
lnk_paths <- xml2::url_parse(lnk_hrefs)$path
# links without HTML extension
Expand Down
6 changes: 4 additions & 2 deletions tests/testthat/_snaps/utils-xml.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Code
xml2::xml_find_all(html_test, ".//a[@href]")
Output
{xml_nodeset (10)}
{xml_nodeset (11)}
[1] <a href="index.html">a</a>
[2] <a href="./index.html">b</a>
[3] <a href="fig/thing.png">c</a>
Expand All @@ -14,13 +14,14 @@
[8] <a href="#what-the">h</a>
[9] <a href="other-page.html#section">i</a>
[10] <a href="other-page">j</a>
[11] <a href="mailto:[email protected]?subject='no'">k</a>

---

Code
xml2::xml_find_all(res, ".//a[@href]")
Output
{xml_nodeset (10)}
{xml_nodeset (11)}
[1] <a href="index.html">a</a>
[2] <a href="./index.html">b</a>
[3] <a href="../fig/thing.png">c</a>
Expand All @@ -31,4 +32,5 @@
[8] <a href="#what-the">h</a>
[9] <a href="other-page.html#section">i</a>
[10] <a href="other-page">j</a>
[11] <a href="mailto:[email protected]?subject='no'">k</a>

5 changes: 3 additions & 2 deletions tests/testthat/test-utils-xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ test_that("paths in instructor view that are nested or not HTML get diverted", {
"[g](files/confirmation.html)", # asset
"[h](#what-the)",
"[i](other-page.html#section)",
"[j](other-page)"
"[j](other-page)",
"[k](mailto:[email protected]?subject='no')"
)))
res <- xml2::read_html(use_instructor(html_test))
# refs are transformed according to our rules
refs <- xml2::xml_text(xml2::xml_find_all(res, ".//@href"))
expect_equal(startsWith(refs, "../"),
c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE))
c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE))
expect_snapshot(xml2::xml_find_all(html_test, ".//a[@href]"))
expect_snapshot(xml2::xml_find_all(res, ".//a[@href]"))
})
Expand Down

0 comments on commit 99d420c

Please sign in to comment.