From 5ed603327f11b656798864593fa1140021887c30 Mon Sep 17 00:00:00 2001 From: Simon Potter Date: Thu, 28 Jul 2016 20:49:40 +1200 Subject: [PATCH] Improve support for nth-*(an+b) selectors. Ported from @redapple's implementation. --- R/xpath.R | 211 +++++++++++++++++------------- tests/testthat/test-main.R | 8 +- tests/testthat/test-select.R | 3 +- tests/testthat/test-translation.R | 46 ++++--- 4 files changed, 156 insertions(+), 112 deletions(-) diff --git a/R/xpath.R b/R/xpath.R index 82dde47..9082d2c 100644 --- a/R/xpath.R +++ b/R/xpath.R @@ -236,7 +236,7 @@ GenericTranslator <- setRefClass("GenericTranslator", xpath }, xpath_descendant_combinator = function(left, right) { - left$join("/descendant-or-self::*/", right) + left$join("/descendant::", right) }, xpath_child_combinator = function(left, right) { left$join("/", right) @@ -254,97 +254,140 @@ GenericTranslator <- setRefClass("GenericTranslator", ab <- parse_series(fn$arguments) a <- ab[1] b <- ab[2] - if (add_name_test) { - xpath$add_name_test() - } - xpath$add_star_prefix() - # non-last - # -------- - # position() = an+b - # -> position() - b = an + + # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: # - # if a < 0: - # position() - b < 0 - # -> position() < b + # :nth-child(an+b) + # an+b-1 siblings before + # + # :nth-last-child(an+b) + # an+b-1 siblings after + # + # :nth-of-type(an+b) + # an+b-1 siblings with the same expanded element name before + # + # :nth-last-of-type(an+b) + # an+b-1 siblings with the same expanded element name after + # + # So, + # for :nth-child and :nth-of-type + # + # count(preceding-sibling::) = an+b-1 + # + # for :nth-last-child and :nth-last-of-type + # + # count(following-sibling::) = an+b-1 # - # last - # ---- - # last() - position() = an+b -1 - # -> last() - position() - b +1 = an + # therefore, + # count(...) - (b-1) ≡ 0 (mod a) + # + # if a == 0: + # ~~~~~~~~~~ + # count(...) = b-1 # # if a < 0: - # last() - position() - b +1 < 0 - # -> position() > last() - b +1 + # ~~~~~~~~~ + # count(...) - b +1 <= 0 + # -> count(...) <= b-1 # - if (b > 0) { - b_neg <- as.character(-b) + # if a > 0: + # ~~~~~~~~~ + # count(...) - b +1 >= 0 + # -> count(...) >= b-1 + + # work with b-1 instead + b_min_1 <- b - 1 + + # early-exit condition 1: + # ~~~~~~~~~~~~~~~~~~~~~~~ + # for a == 1, nth-*(an+b) means n+b-1 siblings before/after, + # and since n %in% {0, 1, 2, ...}, if b-1<=0, + # there is always an "n" matching any number of siblings (maybe none) + if (a == 1 && b_min_1 <=0) { + return(xpath) + } + # early-exit condition 2: + # ~~~~~~~~~~~~~~~~~~~~~~~ + # an+b-1 siblings with a<0 and (b-1)<0 is not possible + if (a < 0 && b_min_1 < 0) { + xpath$add_condition("0") + return(xpath) + } + + # `add_name_test` boolean is inverted and somewhat counter-intuitive: + # + # nth_of_type() calls nth_child(add_name_test=False) + if (add_name_test) { + nodetest <- "*" } else { - b_neg <- sprintf("+%s", -b) + nodetest <- sprintf("%s", xpath$element) } + + # count siblings before or after the element + if (!last) { + siblings_count <- sprintf("count(preceding-sibling::%s)", nodetest) + } else { + siblings_count <- sprintf("count(following-sibling::%s)", nodetest) + } + + # special case of fixed position: nth-*(0n+b) + # if a == 0: + # ~~~~~~~~~~ + # count(***-sibling::***) = b-1 if (a == 0) { - if (last) { - # http://www.w3.org/TR/selectors/#nth-last-child-pseudo - # The :nth-last-child(an+b) pseudo-class notation represents - # an element that has an+b-1 siblings after it in the document tree - # - # last() - position() = an+b-1 - # -> position() = last() -b +1 (for a==0) - # - if (b == 1) { - b <- "last()" - } else { - b <- sprintf("last() %s +1", b_neg) - } - } - xpath$add_condition(sprintf("position() = %s", b)) + xpath$add_condition(sprintf("%s = %s", siblings_count, b_min_1)) return(xpath) } - if (a != 1) { - if (last) { - if (b == 0) { - expr <- sprintf("(last() - position() +1) mod %s = 0", a) - } else { - expr <- sprintf("(last() - position() %s +1) mod %s = 0", - b_neg, a) - } - } else { - if (b == 0) { - expr <- sprintf("position() mod %s = 0", a) - } else { - expr <- sprintf("(position() %s) mod %s = 0", b_neg, a) - } + + expr <- character(0) + + if (a > 0) { + # siblings count, an+b-1, is always >= 0, + # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, + # therefore, the predicate is only interesting if (b-1)>0 + if (b_min_1 > 0) { + expr <- c(expr, sprintf("%s >= %s", siblings_count, b_min_1)) } } else { - expr <- character(0) + # if a<0, and (b-1)<0, no "n" satisfies this, + # this is tested above as an early exist condition + # otherwise, + expr <- c(expr, sprintf("%s <= %s", siblings_count, b_min_1)) } - if (last) { - tmpop <- if (a > 0) "<=" else ">=" - if (b == 0) { - expr <- c(expr, sprintf("(position() %s last() +1)", tmpop)) - } else { - expr <- c(expr, sprintf("position() %s (last() %s +1)", tmpop, b_neg)) - } - } else { - tmpop <- if (a > 0) ">=" else "<=" - if (b > 0) { - # position() > 0 so if b < 0, position() > b, always - expr <- c(expr, sprintf("position() %s %s", tmpop, b)) - } else if (b == 0) { - expr <- c(expr, "position()") + + # operations modulo 1 or -1 are simpler, one only needs to verify: + # + # - either: + # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc., + # i.e. count(***-sibling::***) >= (b-1) + # + # - or: + # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc., + # i.e. count(***-sibling::***) <= (b-1) + # we we just did above. + # + if (abs(a) != 1) { + # count(***-sibling::***) - (b-1) ≡ 0 (mod a) + left <- siblings_count + + # apply "modulo a" on 2nd term, -(b-1), + # to simplify things like "(... +6) % -3", + # and also make it positive with |a| + b_neg <- (-b_min_1) %% abs(a) + + if (b_neg != 0) { + b_neg <- sprintf("+%s", b_neg) + left <- sprintf("(%s %s)", left, b_neg) } + + expr <- c(expr, sprintf("%s mod %s = 0", left, a)) } - expr <- paste0(expr, collapse = " and ") + if (length(expr)) { + expr <- paste0(expr, collapse = " and ") xpath$add_condition(expr) } xpath - # FIXME: handle an+b, odd, even - # an+b means every-a, plus b, e.g., 2n+1 means odd - # 0n+b means b - # n+0 means a=1, i.e., all elements - # an means every a elements, i.e., 2n means even - # -n means -1n - # -1n+6 means elements 6 and previous }, xpath_nth_last_child_function = function(xpath, fn) { xpath_nth_child_function(xpath, fn, last = TRUE) @@ -357,7 +400,7 @@ GenericTranslator <- setRefClass("GenericTranslator", }, xpath_nth_last_of_type_function = function(xpath, fn) { if (xpath$element == "*") { - stop("*:nth-of-type() is not implemented") + stop("*:nth-last-of-type() is not implemented") } xpath_nth_child_function(xpath, fn, last = TRUE, add_name_test = FALSE) }, @@ -384,44 +427,36 @@ GenericTranslator <- setRefClass("GenericTranslator", xpath }, xpath_first_child_pseudo = function(xpath) { - xpath$add_star_prefix() - xpath$add_name_test() - xpath$add_condition("position() = 1") + xpath$add_condition("count(preceding-sibling::*) = 0") xpath }, xpath_last_child_pseudo = function(xpath) { - xpath$add_star_prefix() - xpath$add_name_test() - xpath$add_condition("position() = last()") + xpath$add_condition("count(following-sibling::*) = 0") xpath }, xpath_first_of_type_pseudo = function(xpath) { if (xpath$element == "*") { stop("*:first-of-type is not implemented") } - xpath$add_star_prefix() - xpath$add_condition("position() = 1") + xpath$add_condition(sprintf("count(preceding-sibling::%s) = 0", xpath$element)) xpath }, xpath_last_of_type_pseudo = function(xpath) { if (xpath$element == "*") { stop("*:last-of-type is not implemented") } - xpath$add_star_prefix() - xpath$add_condition("position() = last()") + xpath$add_condition(sprintf("count(following-sibling::%s) = 0", xpath$element)) xpath }, xpath_only_child_pseudo = function(xpath) { - xpath$add_name_test() - xpath$add_star_prefix() - xpath$add_condition('last() = 1') + xpath$add_condition("count(parent::*/child::*) = 1") xpath }, xpath_only_of_type_pseudo = function(xpath) { if (xpath$element == "*") { stop("*:only-of-type is not implemented") } - xpath$add_condition("last() = 1") + xpath$add_condition(sprintf("count(parent::*/child::%s) = 1", xpath$element)) xpath }, xpath_empty_pseudo = function(xpath) { diff --git a/tests/testthat/test-main.R b/tests/testthat/test-main.R index cdc1f7e..403b5a2 100644 --- a/tests/testthat/test-main.R +++ b/tests/testthat/test-main.R @@ -3,10 +3,10 @@ context("main") # We know that the results are correct via other tests, just check that # this produces the correct results with respect to its arguments test_that("css_to_xpath vectorises arguments", { - expect_that(css_to_xpath("a b"), equals("descendant-or-self::a/descendant-or-self::*/b")) - expect_that(css_to_xpath("a b", prefix = ""), equals("a/descendant-or-self::*/b")) - expect_that(css_to_xpath("a b"), equals("descendant-or-self::a/descendant-or-self::*/b", "a/descendant-or-self::*/b")) + expect_that(css_to_xpath("a b"), equals("descendant-or-self::a/descendant::b")) + expect_that(css_to_xpath("a b", prefix = ""), equals("a/descendant::b")) + expect_that(css_to_xpath("a b", prefix = c("descendant-or-self::", "")), equals(c("descendant-or-self::a/descendant::b", "a/descendant::b"))) expect_that(css_to_xpath("a:checked", prefix = "", translator = c("generic", "html")), equals(c("a[0]", "a[(@selected and name(.) = 'option') or (@checked and (name(.) = 'input' or name(.) = 'command')and (@type = 'checkbox' or @type = 'radio'))]"))) - expect_that(css_to_xpath(c("a b", "b c"), prefix = ""), equals(c("a/descendant-or-self::*/b", "b/descendant-or-self::*/c"))) + expect_that(css_to_xpath(c("a b", "b c"), prefix = ""), equals(c("a/descendant::b", "b/descendant::c"))) }) diff --git a/tests/testthat/test-select.R b/tests/testthat/test-select.R index 140a58d..e6efdc5 100644 --- a/tests/testthat/test-select.R +++ b/tests/testthat/test-select.R @@ -96,12 +96,13 @@ test_that("selection works correctly on a large barrage of tests", { # ... :lang() is not. expect_that(pcss(':lang("EN")', '*:lang(en-US)', html_only=TRUE), equals(c('second-li', 'li-div'))) expect_that(pcss(':lang("e")', html_only=TRUE), equals(NULL)) + expect_that(pcss('li:nth-child(-n)'), equals(NULL)) + expect_that(pcss('li:nth-child(n)'), equals(c('first-li', 'second-li', 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(3)'), equals('third-li')) expect_that(pcss('li:nth-child(10)'), equals(NULL)) expect_that(pcss('li:nth-child(2n)', c('li:nth-child(even)', 'li:nth-child(2n+0)')), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), equals(c('first-li', 'third-li', 'fifth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(2n+4)'), equals(c('fourth-li', 'sixth-li'))) - ## FIXME: I'm not 100% sure this is right: expect_that(pcss('li:nth-child(3n+1)'), equals(c('first-li', 'fourth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(-n+3)'), equals(c('first-li', 'second-li', 'third-li'))) expect_that(pcss('li:nth-child(-2n+4)'), equals(c('second-li', 'fourth-li'))) diff --git a/tests/testthat/test-translation.R b/tests/testthat/test-translation.R index 139468a..383ad89 100644 --- a/tests/testthat/test-translation.R +++ b/tests/testthat/test-translation.R @@ -24,37 +24,43 @@ test_that("translation from parsed objects to XPath works", { expect_that(xpath('e[hreflang|="en"]'), equals("e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")) expect_that(xpath('e:nth-child(1)'), - equals("*/*[name() = 'e' and (position() = 1)]")) + equals("e[count(preceding-sibling::*) = 0]")) expect_that(xpath('e:nth-child(3n+2)'), - equals("*/*[name() = 'e' and ((position() -2) mod 3 = 0 and position() >= 2)]")) + equals("e[count(preceding-sibling::*) >= 1 and (count(preceding-sibling::*) +2) mod 3 = 0]")) expect_that(xpath('e:nth-child(3n-2)'), - equals("*/*[name() = 'e' and ((position() +2) mod 3 = 0)]")) + equals("e[count(preceding-sibling::*) mod 3 = 0]")) expect_that(xpath('e:nth-child(-n+6)'), - equals("*/*[name() = 'e' and ((position() -6) mod -1 = 0 and position() <= 6)]")) + equals("e[count(preceding-sibling::*) <= 5]")) expect_that(xpath('e:nth-last-child(1)'), - equals("*/*[name() = 'e' and (position() = last())]")) + equals("e[count(following-sibling::*) = 0]")) expect_that(xpath('e:nth-last-child(2n)'), - equals("*/*[name() = 'e' and ((last() - position() +1) mod 2 = 0 and (position() <= last() +1))]")) + equals("e[(count(following-sibling::*) +1) mod 2 = 0]")) + expect_that(xpath('e:nth-last-child(2n+1)'), + equals("e[count(following-sibling::*) mod 2 = 0]")) expect_that(xpath('e:nth-last-child(2n+2)'), - equals("*/*[name() = 'e' and ((last() - position() -2 +1) mod 2 = 0 and position() <= (last() -2 +1))]")) + equals("e[count(following-sibling::*) >= 1 and (count(following-sibling::*) +1) mod 2 = 0]")) + expect_that(xpath('e:nth-last-child(3n+1)'), + equals("e[count(following-sibling::*) mod 3 = 0]")) + expect_that(xpath('e:nth-last-child(-n+2)'), + equals("e[count(following-sibling::*) <= 1]")) expect_that(xpath('e:nth-of-type(1)'), - equals("*/e[position() = 1]")) + equals("e[count(preceding-sibling::e) = 0]")) expect_that(xpath('e:nth-last-of-type(1)'), - equals("*/e[position() = last()]")) + equals("e[count(following-sibling::e) = 0]")) expect_that(xpath('div e:nth-last-of-type(1) .aclass'), - equals("div/descendant-or-self::*/e[position() = last()]/descendant-or-self::*/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' aclass ')]")) + equals("div/descendant::e[count(following-sibling::e) = 0]/descendant::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' aclass ')]")) expect_that(xpath('e:first-child'), - equals("*/*[name() = 'e' and (position() = 1)]")) + equals("e[count(preceding-sibling::*) = 0]")) expect_that(xpath('e:last-child'), - equals("*/*[name() = 'e' and (position() = last())]")) + equals("e[count(following-sibling::*) = 0]")) expect_that(xpath('e:first-of-type'), - equals("*/e[position() = 1]")) + equals("e[count(preceding-sibling::e) = 0]")) expect_that(xpath('e:last-of-type'), - equals("*/e[position() = last()]")) + equals("e[count(following-sibling::e) = 0]")) expect_that(xpath('e:only-child'), - equals("*/*[name() = 'e' and (last() = 1)]")) + equals("e[count(parent::*/child::*) = 1]")) expect_that(xpath('e:only-of-type'), - equals("e[last() = 1]")) + equals("e[count(parent::*/child::e) = 1]")) expect_that(xpath('e:empty'), equals("e[not(*) and not(string-length())]")) expect_that(xpath('e:EmPTY'), @@ -72,19 +78,21 @@ test_that("translation from parsed objects to XPath works", { expect_that(xpath('e#myid'), equals("e[@id = 'myid']")) expect_that(xpath('e:not(:nth-child(odd))'), - equals("e[not((position() -1) mod 2 = 0 and position() >= 1)]")) + equals("e[not(count(preceding-sibling::*) mod 2 = 0)]")) expect_that(xpath('e:nOT(*)'), equals("e[0]")) # never matches expect_that(xpath('e f'), - equals("e/descendant-or-self::*/f")) + equals("e/descendant::f")) expect_that(xpath('e > f'), equals("e/f")) expect_that(xpath('e + f'), equals("e/following-sibling::*[name() = 'f' and (position() = 1)]")) expect_that(xpath('e ~ f'), equals("e/following-sibling::f")) + expect_that(xpath('e ~ f:nth-child(3)'), + equals("e/following-sibling::f[count(preceding-sibling::*) = 2]")) expect_that(xpath('div#container p'), - equals("div[@id = 'container']/descendant-or-self::*/p")) + equals("div[@id = 'container']/descendant::p")) # Invalid characters in XPath element names