Skip to content

Commit

Permalink
Improve support for nth-*(an+b) selectors.
Browse files Browse the repository at this point in the history
Ported from @redapple's implementation.
  • Loading branch information
sjp committed Jul 28, 2016
1 parent 9653ba0 commit 5ed6033
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 112 deletions.
211 changes: 123 additions & 88 deletions R/xpath.R
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ GenericTranslator <- setRefClass("GenericTranslator",
xpath
},
xpath_descendant_combinator = function(left, right) {
left$join("/descendant-or-self::*/", right)
left$join("/descendant::", right)
},
xpath_child_combinator = function(left, right) {
left$join("/", right)
Expand All @@ -254,97 +254,140 @@ GenericTranslator <- setRefClass("GenericTranslator",
ab <- parse_series(fn$arguments)
a <- ab[1]
b <- ab[2]
if (add_name_test) {
xpath$add_name_test()
}
xpath$add_star_prefix()
# non-last
# --------
# position() = an+b
# -> position() - b = an

# From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
#
# if a < 0:
# position() - b < 0
# -> position() < b
# :nth-child(an+b)
# an+b-1 siblings before
#
# :nth-last-child(an+b)
# an+b-1 siblings after
#
# :nth-of-type(an+b)
# an+b-1 siblings with the same expanded element name before
#
# :nth-last-of-type(an+b)
# an+b-1 siblings with the same expanded element name after
#
# So,
# for :nth-child and :nth-of-type
#
# count(preceding-sibling::<nodetest>) = an+b-1
#
# for :nth-last-child and :nth-last-of-type
#
# count(following-sibling::<nodetest>) = an+b-1
#
# last
# ----
# last() - position() = an+b -1
# -> last() - position() - b +1 = an
# therefore,
# count(...) - (b-1) ≡ 0 (mod a)
#
# if a == 0:
# ~~~~~~~~~~
# count(...) = b-1
#
# if a < 0:
# last() - position() - b +1 < 0
# -> position() > last() - b +1
# ~~~~~~~~~
# count(...) - b +1 <= 0
# -> count(...) <= b-1
#
if (b > 0) {
b_neg <- as.character(-b)
# if a > 0:
# ~~~~~~~~~
# count(...) - b +1 >= 0
# -> count(...) >= b-1

# work with b-1 instead
b_min_1 <- b - 1

# early-exit condition 1:
# ~~~~~~~~~~~~~~~~~~~~~~~
# for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
# and since n %in% {0, 1, 2, ...}, if b-1<=0,
# there is always an "n" matching any number of siblings (maybe none)
if (a == 1 && b_min_1 <=0) {
return(xpath)
}
# early-exit condition 2:
# ~~~~~~~~~~~~~~~~~~~~~~~
# an+b-1 siblings with a<0 and (b-1)<0 is not possible
if (a < 0 && b_min_1 < 0) {
xpath$add_condition("0")
return(xpath)
}

# `add_name_test` boolean is inverted and somewhat counter-intuitive:
#
# nth_of_type() calls nth_child(add_name_test=False)
if (add_name_test) {
nodetest <- "*"
} else {
b_neg <- sprintf("+%s", -b)
nodetest <- sprintf("%s", xpath$element)
}

# count siblings before or after the element
if (!last) {
siblings_count <- sprintf("count(preceding-sibling::%s)", nodetest)
} else {
siblings_count <- sprintf("count(following-sibling::%s)", nodetest)
}

# special case of fixed position: nth-*(0n+b)
# if a == 0:
# ~~~~~~~~~~
# count(***-sibling::***) = b-1
if (a == 0) {
if (last) {
# http://www.w3.org/TR/selectors/#nth-last-child-pseudo
# The :nth-last-child(an+b) pseudo-class notation represents
# an element that has an+b-1 siblings after it in the document tree
#
# last() - position() = an+b-1
# -> position() = last() -b +1 (for a==0)
#
if (b == 1) {
b <- "last()"
} else {
b <- sprintf("last() %s +1", b_neg)
}
}
xpath$add_condition(sprintf("position() = %s", b))
xpath$add_condition(sprintf("%s = %s", siblings_count, b_min_1))
return(xpath)
}
if (a != 1) {
if (last) {
if (b == 0) {
expr <- sprintf("(last() - position() +1) mod %s = 0", a)
} else {
expr <- sprintf("(last() - position() %s +1) mod %s = 0",
b_neg, a)
}
} else {
if (b == 0) {
expr <- sprintf("position() mod %s = 0", a)
} else {
expr <- sprintf("(position() %s) mod %s = 0", b_neg, a)
}

expr <- character(0)

if (a > 0) {
# siblings count, an+b-1, is always >= 0,
# so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
# therefore, the predicate is only interesting if (b-1)>0
if (b_min_1 > 0) {
expr <- c(expr, sprintf("%s >= %s", siblings_count, b_min_1))
}
} else {
expr <- character(0)
# if a<0, and (b-1)<0, no "n" satisfies this,
# this is tested above as an early exist condition
# otherwise,
expr <- c(expr, sprintf("%s <= %s", siblings_count, b_min_1))
}
if (last) {
tmpop <- if (a > 0) "<=" else ">="
if (b == 0) {
expr <- c(expr, sprintf("(position() %s last() +1)", tmpop))
} else {
expr <- c(expr, sprintf("position() %s (last() %s +1)", tmpop, b_neg))
}
} else {
tmpop <- if (a > 0) ">=" else "<="
if (b > 0) {
# position() > 0 so if b < 0, position() > b, always
expr <- c(expr, sprintf("position() %s %s", tmpop, b))
} else if (b == 0) {
expr <- c(expr, "position()")

# operations modulo 1 or -1 are simpler, one only needs to verify:
#
# - either:
# count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
# i.e. count(***-sibling::***) >= (b-1)
#
# - or:
# count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
# i.e. count(***-sibling::***) <= (b-1)
# we we just did above.
#
if (abs(a) != 1) {
# count(***-sibling::***) - (b-1) ≡ 0 (mod a)
left <- siblings_count

# apply "modulo a" on 2nd term, -(b-1),
# to simplify things like "(... +6) % -3",
# and also make it positive with |a|
b_neg <- (-b_min_1) %% abs(a)

if (b_neg != 0) {
b_neg <- sprintf("+%s", b_neg)
left <- sprintf("(%s %s)", left, b_neg)
}

expr <- c(expr, sprintf("%s mod %s = 0", left, a))
}
expr <- paste0(expr, collapse = " and ")

if (length(expr)) {
expr <- paste0(expr, collapse = " and ")
xpath$add_condition(expr)
}
xpath
# FIXME: handle an+b, odd, even
# an+b means every-a, plus b, e.g., 2n+1 means odd
# 0n+b means b
# n+0 means a=1, i.e., all elements
# an means every a elements, i.e., 2n means even
# -n means -1n
# -1n+6 means elements 6 and previous
},
xpath_nth_last_child_function = function(xpath, fn) {
xpath_nth_child_function(xpath, fn, last = TRUE)
Expand All @@ -357,7 +400,7 @@ GenericTranslator <- setRefClass("GenericTranslator",
},
xpath_nth_last_of_type_function = function(xpath, fn) {
if (xpath$element == "*") {
stop("*:nth-of-type() is not implemented")
stop("*:nth-last-of-type() is not implemented")
}
xpath_nth_child_function(xpath, fn, last = TRUE, add_name_test = FALSE)
},
Expand All @@ -384,44 +427,36 @@ GenericTranslator <- setRefClass("GenericTranslator",
xpath
},
xpath_first_child_pseudo = function(xpath) {
xpath$add_star_prefix()
xpath$add_name_test()
xpath$add_condition("position() = 1")
xpath$add_condition("count(preceding-sibling::*) = 0")
xpath
},
xpath_last_child_pseudo = function(xpath) {
xpath$add_star_prefix()
xpath$add_name_test()
xpath$add_condition("position() = last()")
xpath$add_condition("count(following-sibling::*) = 0")
xpath
},
xpath_first_of_type_pseudo = function(xpath) {
if (xpath$element == "*") {
stop("*:first-of-type is not implemented")
}
xpath$add_star_prefix()
xpath$add_condition("position() = 1")
xpath$add_condition(sprintf("count(preceding-sibling::%s) = 0", xpath$element))
xpath
},
xpath_last_of_type_pseudo = function(xpath) {
if (xpath$element == "*") {
stop("*:last-of-type is not implemented")
}
xpath$add_star_prefix()
xpath$add_condition("position() = last()")
xpath$add_condition(sprintf("count(following-sibling::%s) = 0", xpath$element))
xpath
},
xpath_only_child_pseudo = function(xpath) {
xpath$add_name_test()
xpath$add_star_prefix()
xpath$add_condition('last() = 1')
xpath$add_condition("count(parent::*/child::*) = 1")
xpath
},
xpath_only_of_type_pseudo = function(xpath) {
if (xpath$element == "*") {
stop("*:only-of-type is not implemented")
}
xpath$add_condition("last() = 1")
xpath$add_condition(sprintf("count(parent::*/child::%s) = 1", xpath$element))
xpath
},
xpath_empty_pseudo = function(xpath) {
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-main.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ context("main")
# We know that the results are correct via other tests, just check that
# this produces the correct results with respect to its arguments
test_that("css_to_xpath vectorises arguments", {
expect_that(css_to_xpath("a b"), equals("descendant-or-self::a/descendant-or-self::*/b"))
expect_that(css_to_xpath("a b", prefix = ""), equals("a/descendant-or-self::*/b"))
expect_that(css_to_xpath("a b"), equals("descendant-or-self::a/descendant-or-self::*/b", "a/descendant-or-self::*/b"))
expect_that(css_to_xpath("a b"), equals("descendant-or-self::a/descendant::b"))
expect_that(css_to_xpath("a b", prefix = ""), equals("a/descendant::b"))
expect_that(css_to_xpath("a b", prefix = c("descendant-or-self::", "")), equals(c("descendant-or-self::a/descendant::b", "a/descendant::b")))
expect_that(css_to_xpath("a:checked", prefix = "", translator = c("generic", "html")),
equals(c("a[0]", "a[(@selected and name(.) = 'option') or (@checked and (name(.) = 'input' or name(.) = 'command')and (@type = 'checkbox' or @type = 'radio'))]")))
expect_that(css_to_xpath(c("a b", "b c"), prefix = ""), equals(c("a/descendant-or-self::*/b", "b/descendant-or-self::*/c")))
expect_that(css_to_xpath(c("a b", "b c"), prefix = ""), equals(c("a/descendant::b", "b/descendant::c")))
})
3 changes: 2 additions & 1 deletion tests/testthat/test-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,13 @@ test_that("selection works correctly on a large barrage of tests", {
# ... :lang() is not.
expect_that(pcss(':lang("EN")', '*:lang(en-US)', html_only=TRUE), equals(c('second-li', 'li-div')))
expect_that(pcss(':lang("e")', html_only=TRUE), equals(NULL))
expect_that(pcss('li:nth-child(-n)'), equals(NULL))
expect_that(pcss('li:nth-child(n)'), equals(c('first-li', 'second-li', 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li')))
expect_that(pcss('li:nth-child(3)'), equals('third-li'))
expect_that(pcss('li:nth-child(10)'), equals(NULL))
expect_that(pcss('li:nth-child(2n)', c('li:nth-child(even)', 'li:nth-child(2n+0)')), equals(c('second-li', 'fourth-li', 'sixth-li')))
expect_that(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), equals(c('first-li', 'third-li', 'fifth-li', 'seventh-li')))
expect_that(pcss('li:nth-child(2n+4)'), equals(c('fourth-li', 'sixth-li')))
## FIXME: I'm not 100% sure this is right:
expect_that(pcss('li:nth-child(3n+1)'), equals(c('first-li', 'fourth-li', 'seventh-li')))
expect_that(pcss('li:nth-child(-n+3)'), equals(c('first-li', 'second-li', 'third-li')))
expect_that(pcss('li:nth-child(-2n+4)'), equals(c('second-li', 'fourth-li')))
Expand Down
46 changes: 27 additions & 19 deletions tests/testthat/test-translation.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,37 +24,43 @@ test_that("translation from parsed objects to XPath works", {
expect_that(xpath('e[hreflang|="en"]'),
equals("e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]"))
expect_that(xpath('e:nth-child(1)'),
equals("*/*[name() = 'e' and (position() = 1)]"))
equals("e[count(preceding-sibling::*) = 0]"))
expect_that(xpath('e:nth-child(3n+2)'),
equals("*/*[name() = 'e' and ((position() -2) mod 3 = 0 and position() >= 2)]"))
equals("e[count(preceding-sibling::*) >= 1 and (count(preceding-sibling::*) +2) mod 3 = 0]"))
expect_that(xpath('e:nth-child(3n-2)'),
equals("*/*[name() = 'e' and ((position() +2) mod 3 = 0)]"))
equals("e[count(preceding-sibling::*) mod 3 = 0]"))
expect_that(xpath('e:nth-child(-n+6)'),
equals("*/*[name() = 'e' and ((position() -6) mod -1 = 0 and position() <= 6)]"))
equals("e[count(preceding-sibling::*) <= 5]"))
expect_that(xpath('e:nth-last-child(1)'),
equals("*/*[name() = 'e' and (position() = last())]"))
equals("e[count(following-sibling::*) = 0]"))
expect_that(xpath('e:nth-last-child(2n)'),
equals("*/*[name() = 'e' and ((last() - position() +1) mod 2 = 0 and (position() <= last() +1))]"))
equals("e[(count(following-sibling::*) +1) mod 2 = 0]"))
expect_that(xpath('e:nth-last-child(2n+1)'),
equals("e[count(following-sibling::*) mod 2 = 0]"))
expect_that(xpath('e:nth-last-child(2n+2)'),
equals("*/*[name() = 'e' and ((last() - position() -2 +1) mod 2 = 0 and position() <= (last() -2 +1))]"))
equals("e[count(following-sibling::*) >= 1 and (count(following-sibling::*) +1) mod 2 = 0]"))
expect_that(xpath('e:nth-last-child(3n+1)'),
equals("e[count(following-sibling::*) mod 3 = 0]"))
expect_that(xpath('e:nth-last-child(-n+2)'),
equals("e[count(following-sibling::*) <= 1]"))
expect_that(xpath('e:nth-of-type(1)'),
equals("*/e[position() = 1]"))
equals("e[count(preceding-sibling::e) = 0]"))
expect_that(xpath('e:nth-last-of-type(1)'),
equals("*/e[position() = last()]"))
equals("e[count(following-sibling::e) = 0]"))
expect_that(xpath('div e:nth-last-of-type(1) .aclass'),
equals("div/descendant-or-self::*/e[position() = last()]/descendant-or-self::*/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' aclass ')]"))
equals("div/descendant::e[count(following-sibling::e) = 0]/descendant::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' aclass ')]"))
expect_that(xpath('e:first-child'),
equals("*/*[name() = 'e' and (position() = 1)]"))
equals("e[count(preceding-sibling::*) = 0]"))
expect_that(xpath('e:last-child'),
equals("*/*[name() = 'e' and (position() = last())]"))
equals("e[count(following-sibling::*) = 0]"))
expect_that(xpath('e:first-of-type'),
equals("*/e[position() = 1]"))
equals("e[count(preceding-sibling::e) = 0]"))
expect_that(xpath('e:last-of-type'),
equals("*/e[position() = last()]"))
equals("e[count(following-sibling::e) = 0]"))
expect_that(xpath('e:only-child'),
equals("*/*[name() = 'e' and (last() = 1)]"))
equals("e[count(parent::*/child::*) = 1]"))
expect_that(xpath('e:only-of-type'),
equals("e[last() = 1]"))
equals("e[count(parent::*/child::e) = 1]"))
expect_that(xpath('e:empty'),
equals("e[not(*) and not(string-length())]"))
expect_that(xpath('e:EmPTY'),
Expand All @@ -72,19 +78,21 @@ test_that("translation from parsed objects to XPath works", {
expect_that(xpath('e#myid'),
equals("e[@id = 'myid']"))
expect_that(xpath('e:not(:nth-child(odd))'),
equals("e[not((position() -1) mod 2 = 0 and position() >= 1)]"))
equals("e[not(count(preceding-sibling::*) mod 2 = 0)]"))
expect_that(xpath('e:nOT(*)'),
equals("e[0]")) # never matches
expect_that(xpath('e f'),
equals("e/descendant-or-self::*/f"))
equals("e/descendant::f"))
expect_that(xpath('e > f'),
equals("e/f"))
expect_that(xpath('e + f'),
equals("e/following-sibling::*[name() = 'f' and (position() = 1)]"))
expect_that(xpath('e ~ f'),
equals("e/following-sibling::f"))
expect_that(xpath('e ~ f:nth-child(3)'),
equals("e/following-sibling::f[count(preceding-sibling::*) = 2]"))
expect_that(xpath('div#container p'),
equals("div[@id = 'container']/descendant-or-self::*/p"))
equals("div[@id = 'container']/descendant::p"))

# Invalid characters in XPath element names

Expand Down

2 comments on commit 5ed6033

@redapple
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sjp , happy to see this implementation useful elsewhere!
It's still not merged in cssselect (lacking review and feedback currently).
Please lets us know if this works in practice or if further work is needed.

@sjp
Copy link
Owner Author

@sjp sjp commented on 5ed6033 Aug 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure thing. Seemed to work well enough in my limited testing.

Fortunately it's not a common selector and not one I expect many users will encounter. If there are any issues I'll let you know and hopefully have a fix ready for you.

Please sign in to comment.