Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add suppport for [of S]? part in nth-child's arguments #120

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
48 changes: 42 additions & 6 deletions cssselect/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,14 @@ def __repr__(self):
return "%s[::%s(%r)]" % (
self.__class__.__name__,
self.name,
[token.value for token in self.arguments],
[token.value for token in self.arguments[0]],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line rang a bell to me. With no other changes to the class, it means that the arguments argument is now expected to be formatted differently, which breaks backward compatibility. Consider the following diff:

diff --git cssselect/parser.py cssselect/parser.py
index 4351f07..48e7604 100644
--- cssselect/parser.py
+++ cssselect/parser.py
@@ -160,6 +160,7 @@ class FunctionalPseudoElement(object):
     def __init__(self, name, arguments):
         self.name = ascii_lower(name)
         self.arguments = arguments
+        print("arguments:", arguments)
 
     def __repr__(self):
         return "%s[::%s(%r)]" % (

Then at the current master branch (9edc6c3):

In [1]: from cssselect import parse

In [2]: p = parse("a::asdf(arg1)")
arguments: [<IDENT 'arg1' at 8>]

While at the current add_support_for_of_s_part_in_nth_child branch (058b6b5):

In [1]: from cssselect import parse

In [2]: p = parse("a::asdf(arg1)")
arguments: ([<IDENT 'arg1' at 8>], None)

Could you explain the rationale of this change? Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, the backward compatibility breaks, because I changed signature of method parse_arguments which is used both for functions and functional pseudo elements.
Now I splitted parsing arguments into separate methods, so nothing will break
Nice catch!

)

def argument_types(self):
return [token.type for token in self.arguments]

def canonical(self):
args = "".join(token.css() for token in self.arguments)
args = "".join(token.css() for token in self.arguments[0])
return "%s(%s)" % (self.name, args)

def specificity(self):
Expand All @@ -186,12 +186,26 @@ class Function(object):
Represents selector:name(expr)
"""

def __init__(self, selector, name, arguments):
def __init__(self, selector, name, arguments, of_type=None):
self.selector = selector
self.name = ascii_lower(name)
self.arguments = arguments

# for css4 :nth-child(An+B of Subselector)
try:
self.of_type = of_type[0]
except (IndexError, TypeError):
self.of_type = None

def __repr__(self):
if self.of_type:
return "%s[%r:%s(%r of %s)]" % (
self.__class__.__name__,
self.selector,
self.name,
[token.value for token in self.arguments],
self.of_type.__repr__(),
)
return "%s[%r:%s(%r)]" % (
self.__class__.__name__,
self.selector,
Expand Down Expand Up @@ -586,7 +600,8 @@ def parse_simple_selector(stream, inside_negation=False):
selectors = parse_simple_selector_arguments(stream)
result = Matching(result, selectors)
else:
result = Function(result, ident, parse_arguments(stream))
arguments, of_type = parse_arguments(stream)
result = Function(result, ident, arguments, of_type)
else:
raise SelectorSyntaxError("Expected selector, got %s" % (peek,))
if len(stream.used) == selector_start:
Expand All @@ -599,10 +614,18 @@ def parse_arguments(stream):
while 1:
stream.skip_whitespace()
next = stream.next()
if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]:
if next == ("IDENT", "of"):
stream.skip_whitespace()
of_type = parse_of_type(stream)
return arguments, of_type
elif next.type in ("IDENT", "STRING", "NUMBER") or next in [
("DELIM", "+"),
("DELIM", "-"),
]:
arguments.append(next)
elif next == ("DELIM", ")"):
return arguments
return arguments, None

else:
raise SelectorSyntaxError("Expected an argument, got %s" % (next,))

Expand All @@ -629,6 +652,17 @@ def parse_simple_selector_arguments(stream):
return arguments


def parse_of_type(stream):
subselector = ""
while 1:
next = stream.next()
if next == ("DELIM", ")"):
break
subselector += next.value
result = parse(subselector)
return result


def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
Expand Down Expand Up @@ -680,6 +714,7 @@ def parse_series(tokens):
for token in tokens:
if token.type == "STRING":
raise ValueError("String tokens not allowed in series.")

s = "".join(token.value for token in tokens).strip()
if s == "odd":
return 2, 1
Expand All @@ -701,6 +736,7 @@ def parse_series(tokens):
b = 0
else:
b = int(b)

return a, b


Expand Down
4 changes: 3 additions & 1 deletion cssselect/xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,9 @@ def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=Tr
# `add_name_test` boolean is inverted and somewhat counter-intuitive:
#
# nth_of_type() calls nth_child(add_name_test=False)
if add_name_test:
if function.of_type:
nodetest = self.xpath(function.of_type.parsed_tree)
elif add_name_test:
nodetest = "*"
else:
nodetest = "%s" % xpath.element
Expand Down
13 changes: 12 additions & 1 deletion tests/test_cssselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,14 @@ def xpath(css):
)

# --- nth-* and nth-last-* -------------------------------------
assert xpath("e:nth-child(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]"
assert xpath("e:nth-of-type(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]"
assert (
xpath("e:nth-child(2n+1 of li.important)") == "e[count(preceding-sibling::li[@class"
" and contains(concat(' ', normalize-space(@class), ' '), ' important ')])"
" mod 2 = 0]"
)

assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]")

# always true
Expand Down Expand Up @@ -468,6 +476,9 @@ def xpath(css):
assert xpath("e ~ f:nth-child(3)") == (
"e/following-sibling::f[count(preceding-sibling::*) = 2]"
)
assert xpath("e ~ f:nth-child(3 of S)") == (
"e/following-sibling::f[count(preceding-sibling::S) = 2]"
)
assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p")

# Invalid characters in XPath element names
Expand Down Expand Up @@ -567,7 +578,7 @@ def xpath_five_attributes_pseudo(self, xpath):
# functional pseudo-element:
# element's attribute by name
def xpath_attr_functional_pseudo_element(self, xpath, arguments):
attribute_name = arguments[0].value
attribute_name = arguments[0][0].value
other = XPathExpr(
"@%s" % attribute_name,
"",
Expand Down