From 2722ae66fda4925ace25e640b2073be451a89f5d Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 14 Jul 2021 00:46:45 +0300 Subject: [PATCH 1/6] add suppport for `[of S]`? part in nth-child's arguments --- cssselect/parser.py | 59 +++++++++++++++++++++++++++++++++-------- cssselect/xpath.py | 4 ++- tests/test_cssselect.py | 15 ++++++++++- 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7125030..74e1501 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -161,16 +161,18 @@ def __init__(self, name, arguments): self.arguments = arguments def __repr__(self): - return '%s[::%s(%r)]' % ( - self.__class__.__name__, self.name, - [token.value for token in self.arguments]) + return "%s[::%s(%r)]" % ( + self.__class__.__name__, + self.name, + [token.value for token in self.arguments[0]], + ) def argument_types(self): return [token.type for token in self.arguments] def canonical(self): - args = ''.join(token.css() for token in self.arguments) - return '%s(%s)' % (self.name, args) + args = "".join(token.css() for token in self.arguments[0]) + return "%s(%s)" % (self.name, args) def specificity(self): a, b, c = self.selector.specificity() @@ -182,12 +184,27 @@ class Function(object): """ Represents selector:name(expr) """ - def __init__(self, selector, name, arguments): + + def __init__(self, selector, name, arguments, of_type=None): self.selector = selector self.name = ascii_lower(name) self.arguments = arguments + # for css4 :nth-child(An+B of Subselector) + try: + self.of_type = of_type[0] + except (IndexError, TypeError): + self.of_type = None + def __repr__(self): + if self.of_type: + return "%s[%r:%s(%r of %s)]" % ( + self.__class__.__name__, + self.selector, + self.name, + [token.value for token in self.arguments], + self.of_type.__repr__(), + ) return '%s[%r:%s(%r)]' % ( self.__class__.__name__, self.selector, self.name, [token.value for token in self.arguments]) @@ -539,7 +556,8 @@ def parse_simple_selector(stream, inside_negation=False): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) else: - result = Function(result, ident, parse_arguments(stream)) + arguments, of_type = parse_arguments(stream) + result = Function(result, ident, arguments, of_type) else: raise SelectorSyntaxError( "Expected selector, got %s" % (peek,)) @@ -554,16 +572,33 @@ def parse_arguments(stream): while 1: stream.skip_whitespace() next = stream.next() - if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ - ('DELIM', '+'), ('DELIM', '-')]: + if next == ("IDENT", "of"): + stream.skip_whitespace() + of_type = parse_of_type(stream) + return arguments, of_type + elif next.type in ("IDENT", "STRING", "NUMBER") or next in [ + ("DELIM", "+"), + ("DELIM", "-"), + ]: arguments.append(next) elif next == ('DELIM', ')'): - return arguments + return arguments, None else: raise SelectorSyntaxError( "Expected an argument, got %s" % (next,)) +def parse_of_type(stream): + subselector = "" + while 1: + next = stream.next() + if next == ("DELIM", ")"): + break + subselector += next.value + result = parse(subselector) + return result + + def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() @@ -620,6 +655,7 @@ def parse_series(tokens): for token in tokens: if token.type == 'STRING': raise ValueError('String tokens not allowed in series.') + s = ''.join(token.value for token in tokens).strip() if s == 'odd': return 2, 1 @@ -630,7 +666,7 @@ def parse_series(tokens): if 'n' not in s: # Just b return 0, int(s) - a, b = s.split('n', 1) + a, b = s.split("n", 1) if not a: a = 1 elif a == '-' or a == '+': @@ -641,6 +677,7 @@ def parse_series(tokens): b = 0 else: b = int(b) + return a, b diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a8722bb..0feab6b 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -439,7 +439,9 @@ def xpath_nth_child_function(self, xpath, function, last=False, # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) - if add_name_test: + if function.of_type: + nodetest = self.xpath(function.of_type.parsed_tree) + elif add_name_test: nodetest = '*' else: nodetest = '%s' % xpath.element diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index d6969f2..e40ca62 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -406,6 +406,19 @@ def xpath(css): "@hreflang = 'en' or starts-with(@hreflang, 'en-'))]") # --- nth-* and nth-last-* ------------------------------------- + assert ( + xpath("e:nth-child(2n+1 of S)") + == "e[count(preceding-sibling::S) mod 2 = 0]" + ) + assert ( + xpath("e:nth-of-type(2n+1 of S)") + == "e[count(preceding-sibling::S) mod 2 = 0]" + ) + assert ( + xpath('e:nth-child(2n+1 of li.important)') + == "e[count(preceding-sibling::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' important ')]) mod 2 = 0]" + ) + assert xpath('e:nth-child(1)') == ( "e[count(preceding-sibling::*) = 0]") @@ -606,7 +619,7 @@ def xpath_five_attributes_pseudo(self, xpath): # functional pseudo-element: # element's attribute by name def xpath_attr_functional_pseudo_element(self, xpath, arguments): - attribute_name = arguments[0].value + attribute_name = arguments[0][0].value other = XPathExpr('@%s' % attribute_name, '', ) return xpath.join('/', other) From 772170f1bef6498b7021df7ea235513d6cee4c84 Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 4 Aug 2021 00:28:49 +0300 Subject: [PATCH 2/6] run black --- cssselect/parser.py | 19 +++++++++++-------- cssselect/xpath.py | 2 +- tests/test_cssselect.py | 19 ++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7493d02..4351f07 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -206,9 +206,12 @@ def __repr__(self): [token.value for token in self.arguments], self.of_type.__repr__(), ) - return '%s[%r:%s(%r)]' % ( - self.__class__.__name__, self.selector, self.name, - [token.value for token in self.arguments]) + return "%s[%r:%s(%r)]" % ( + self.__class__.__name__, + self.selector, + self.name, + [token.value for token in self.arguments], + ) def argument_types(self): return [token.type for token in self.arguments] @@ -620,7 +623,7 @@ def parse_arguments(stream): ("DELIM", "-"), ]: arguments.append(next) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): return arguments, None else: @@ -709,11 +712,11 @@ def parse_series(tokens): """ for token in tokens: - if token.type == 'STRING': - raise ValueError('String tokens not allowed in series.') + if token.type == "STRING": + raise ValueError("String tokens not allowed in series.") - s = ''.join(token.value for token in tokens).strip() - if s == 'odd': + s = "".join(token.value for token in tokens).strip() + if s == "odd": return 2, 1 elif s == "even": return 2, 0 diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 7c5c2ef..1612ae4 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -447,7 +447,7 @@ def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=Tr if function.of_type: nodetest = self.xpath(function.of_type.parsed_tree) elif add_name_test: - nodetest = '*' + nodetest = "*" else: nodetest = "%s" % xpath.element diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index b1806d1..b112e9a 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -403,16 +403,10 @@ def xpath(css): ) # --- nth-* and nth-last-* ------------------------------------- + assert xpath("e:nth-child(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]" + assert xpath("e:nth-of-type(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]" assert ( - xpath("e:nth-child(2n+1 of S)") - == "e[count(preceding-sibling::S) mod 2 = 0]" - ) - assert ( - xpath("e:nth-of-type(2n+1 of S)") - == "e[count(preceding-sibling::S) mod 2 = 0]" - ) - assert ( - xpath('e:nth-child(2n+1 of li.important)') + xpath("e:nth-child(2n+1 of li.important)") == "e[count(preceding-sibling::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' important ')]) mod 2 = 0]" ) @@ -581,8 +575,11 @@ def xpath_five_attributes_pseudo(self, xpath): # element's attribute by name def xpath_attr_functional_pseudo_element(self, xpath, arguments): attribute_name = arguments[0][0].value - other = XPathExpr('@%s' % attribute_name, '', ) - return xpath.join('/', other) + other = XPathExpr( + "@%s" % attribute_name, + "", + ) + return xpath.join("/", other) # pseudo-element: # element's text() nodes From 058b6b5fde4edc9b43fcdfca766b02be1f191ce8 Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 4 Aug 2021 08:28:11 +0300 Subject: [PATCH 3/6] fix lint --- tests/test_cssselect.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index b112e9a..36d0df2 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -406,8 +406,9 @@ def xpath(css): assert xpath("e:nth-child(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]" assert xpath("e:nth-of-type(2n+1 of S)") == "e[count(preceding-sibling::S) mod 2 = 0]" assert ( - xpath("e:nth-child(2n+1 of li.important)") - == "e[count(preceding-sibling::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' important ')]) mod 2 = 0]" + xpath("e:nth-child(2n+1 of li.important)") == "e[count(preceding-sibling::li[@class" + " and contains(concat(' ', normalize-space(@class), ' '), ' important ')])" + " mod 2 = 0]" ) assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]") @@ -475,6 +476,9 @@ def xpath(css): assert xpath("e ~ f:nth-child(3)") == ( "e/following-sibling::f[count(preceding-sibling::*) = 2]" ) + assert xpath("e ~ f:nth-child(3 of S)") == ( + "e/following-sibling::f[count(preceding-sibling::S) = 2]" + ) assert xpath("div#container p") == ("div[@id = 'container']/descendant-or-self::*/p") # Invalid characters in XPath element names From ae6e5c35cab478bfea9ef8cdfe7be3e3faba1892 Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 18 Aug 2021 00:09:43 +0300 Subject: [PATCH 4/6] add new method to parse `of type` arguments --- cssselect/parser.py | 19 ++++++++++++++++--- tests/test_cssselect.py | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 4351f07..5360fda 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -165,14 +165,14 @@ def __repr__(self): return "%s[::%s(%r)]" % ( self.__class__.__name__, self.name, - [token.value for token in self.arguments[0]], + [token.value for token in self.arguments], ) def argument_types(self): return [token.type for token in self.arguments] def canonical(self): - args = "".join(token.css() for token in self.arguments[0]) + args = "".join(token.css() for token in self.arguments) return "%s(%s)" % (self.name, args) def specificity(self): @@ -600,7 +600,7 @@ def parse_simple_selector(stream, inside_negation=False): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) else: - arguments, of_type = parse_arguments(stream) + arguments, of_type = parse_function_arguments(stream) result = Function(result, ident, arguments, of_type) else: raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) @@ -610,6 +610,19 @@ def parse_simple_selector(stream, inside_negation=False): def parse_arguments(stream): + arguments = [] + while 1: + stream.skip_whitespace() + next = stream.next() + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]: + arguments.append(next) + elif next == ("DELIM", ")"): + return arguments + else: + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + + +def parse_function_arguments(stream): arguments = [] while 1: stream.skip_whitespace() diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 36d0df2..654908b 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -578,7 +578,7 @@ def xpath_five_attributes_pseudo(self, xpath): # functional pseudo-element: # element's attribute by name def xpath_attr_functional_pseudo_element(self, xpath, arguments): - attribute_name = arguments[0][0].value + attribute_name = arguments[0].value other = XPathExpr( "@%s" % attribute_name, "", From d2a2288fae54ca639d5f6ec947473d58284a403b Mon Sep 17 00:00:00 2001 From: Andrey Rahmatullin Date: Fri, 21 Oct 2022 17:14:25 +0500 Subject: [PATCH 5/6] Silence pylint too-many-locals --- pylintrc | 1 + 1 file changed, 1 insertion(+) diff --git a/pylintrc b/pylintrc index 7da580b..7e27e38 100644 --- a/pylintrc +++ b/pylintrc @@ -25,6 +25,7 @@ disable=assignment-from-no-return, too-many-branches, too-many-function-args, too-many-lines, + too-many-locals, too-many-public-methods, too-many-statements, undefined-variable, From 44aeeddc68a536618ca6270ee5e77aa0c5dd4aac Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 25 Oct 2022 16:37:42 +0600 Subject: [PATCH 6/6] Add typing for new code. --- cssselect/parser.py | 27 ++++++++++++++++++--------- cssselect/xpath.py | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index a8d91dd..9b1c71e 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -190,15 +190,22 @@ class Function: Represents selector:name(expr) """ - def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"], of_type=None) -> None: + def __init__( + self, + selector: Tree, + name: str, + arguments: Sequence["Token"], + of_type: Optional[List[Selector]] = None, + ) -> None: self.selector = selector self.name = ascii_lower(name) self.arguments = arguments # for css4 :nth-child(An+B of Subselector) - try: + self.of_type: Optional[Selector] + if of_type: self.of_type = of_type[0] - except (IndexError, TypeError): + else: self.of_type = None def __repr__(self) -> str: @@ -709,8 +716,8 @@ def parse_simple_selector( selectors = parse_simple_selector_arguments(stream) result = SpecificityAdjustment(result, selectors) else: - arguments, of_type = parse_function_arguments(stream) - result = Function(result, ident, arguments, of_type) + fn_arguments, of_type = parse_function_arguments(stream) + result = Function(result, ident, fn_arguments, of_type) else: raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) if len(stream.used) == selector_start: @@ -731,8 +738,10 @@ def parse_arguments(stream: "TokenStream") -> List["Token"]: raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) -def parse_function_arguments(stream): - arguments = [] +def parse_function_arguments( + stream: "TokenStream", +) -> Tuple[List["Token"], Optional[List[Selector]]]: + arguments: List["Token"] = [] while 1: stream.skip_whitespace() next = stream.next() @@ -797,13 +806,13 @@ def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: return arguments -def parse_of_type(stream): +def parse_of_type(stream: "TokenStream") -> List[Selector]: subselector = "" while 1: next = stream.next() if next == ("DELIM", ")"): break - subselector += next.value + subselector += typing.cast(str, next.value) result = parse(subselector) return result diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 8b31664..05cf46a 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -516,7 +516,7 @@ def xpath_nth_child_function( # # nth_of_type() calls nth_child(add_name_test=False) if function.of_type: - nodetest = self.xpath(function.of_type.parsed_tree) + nodetest = str(self.xpath(function.of_type.parsed_tree)) elif add_name_test: nodetest = "*" else: