From c44c595ade63e274ba2e17a08b79ec06747032d5 Mon Sep 17 00:00:00 2001 From: annbgn Date: Mon, 21 Jun 2021 13:37:17 +0300 Subject: [PATCH 01/11] add support for :has() --- cssselect/parser.py | 45 +++++++++++++++++++++++++++++++++++++++++ cssselect/xpath.py | 6 ++++++ tests/test_cssselect.py | 17 ++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/cssselect/parser.py b/cssselect/parser.py index 7125030..e28ad83 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -250,6 +250,30 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Relation(object): + """ + Represents selector:has(subselector) + """ + def __init__(self, selector, subselector): + self.selector = selector + self.subselector = subselector + + def __repr__(self): + return '%s[%r:has(%r)]' % ( + self.__class__.__name__, self.selector, self.subselector) + + def canonical(self): + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip('*') + return '%s:has(%s)' % (self.selector.canonical(), subsel) + + def specificity(self): + a1, b1, c1 = self.selector.specificity() + a2, b2, c2 = self.subselector.specificity() + return a1 + a2, b1 + b2, c1 + c2 + + class Attrib(object): """ Represents selector[namespace|attrib operator value] @@ -538,6 +562,9 @@ def parse_simple_selector(stream, inside_negation=False): if next != ('DELIM', ')'): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) + elif ident.lower() == 'has': + arguments = parse_relative_selector(stream) + result = Relation(result, arguments) else: result = Function(result, ident, parse_arguments(stream)) else: @@ -564,6 +591,24 @@ def parse_arguments(stream): "Expected an argument, got %s" % (next,)) +def parse_relative_selector(stream): + arguments = [] + stream.skip_whitespace() + next = stream.next() + if next in [('DELIM', '+'), ('DELIM', '-'), ('DELIM', '>'), ('DELIM', '~')]: + arguments.append(next) + while 1: + stream.skip_whitespace() + next = stream.next() + if next.type in ('IDENT', 'STRING', 'NUMBER'): + arguments.append(Element(element=next.value)) + elif next == ('DELIM', ')'): + return arguments + else: + raise SelectorSyntaxError( + "Expected an argument, got %s" % (next,)) + + def parse_attrib(selector, stream): stream.skip_whitespace() attrib = stream.next_ident_or_star() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a8722bb..a6f600f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -272,6 +272,12 @@ def xpath_negation(self, negation): else: return xpath.add_condition('0') + def xpath_relation(self, relation): + xpath = self.xpath(relation.selector) + combinator, subselector, *_ = relation.subselector + method = getattr(self, 'xpath_%s_combinator' % self.combinator_mapping[combinator.value]) + return method(xpath, self.xpath(subselector)) + def xpath_function(self, function): """Translate a functional pseudo-class.""" method = 'xpath_%s_function' % function.name.replace('-', '_') diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index d6969f2..6611e86 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -266,6 +266,13 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) + # assert specificity(':has(*)') == (0, 0, 0) + # assert specificity(':has(foo)') == (0, 0, 1) + # assert specificity(':has(.foo)') == (0, 1, 0) + # assert specificity(':has([foo])') == (0, 1, 0) + # assert specificity(':has(:empty)') == (0, 1, 0) + # assert specificity(':has(#foo)') == (1, 0, 0) + assert specificity('foo:empty') == (0, 1, 1) assert specificity('foo:before') == (0, 0, 2) assert specificity('foo::before') == (0, 0, 2) @@ -300,6 +307,12 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') + # css2css(':has(*)') + # css2css(':has(foo)') + # css2css(':has(*.foo)', ':has(.foo)') + # css2css(':has(*[foo])', ':has([foo])') + # css2css(':has(:empty)') + # css2css(':has(#foo)') css2css('foo:empty') css2css('foo::before') css2css('foo:empty::before') @@ -492,6 +505,7 @@ def xpath(css): "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( "e[0]") # never matches + assert xpath('e:has(> f)') == 'e/f' assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( @@ -863,6 +877,9 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + # assert pcss('link:has(*)') == [] + # assert pcss('link:has([href])') == ['link-href'] + # assert pcss('ol:has(div)') == ['first-ol'] assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] # Invalid characters in XPath element names, should not crash From c4ef8c892dbae8f25f92ded262300c0faabf3abf Mon Sep 17 00:00:00 2001 From: annbgn <47499658+annbgn@users.noreply.github.com> Date: Tue, 22 Jun 2021 23:09:31 +0300 Subject: [PATCH 02/11] allow :has() arguments start not solely from a combinator Co-authored-by: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com> --- cssselect/parser.py | 2 ++ cssselect/xpath.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index e28ad83..ac70f00 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -597,6 +597,8 @@ def parse_relative_selector(stream): next = stream.next() if next in [('DELIM', '+'), ('DELIM', '-'), ('DELIM', '>'), ('DELIM', '~')]: arguments.append(next) + elif next.type in ('IDENT', 'STRING', 'NUMBER'): + arguments.append(Element(element=next.value)) while 1: stream.skip_whitespace() next = stream.next() diff --git a/cssselect/xpath.py b/cssselect/xpath.py index a6f600f..0673b38 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -274,7 +274,7 @@ def xpath_negation(self, negation): def xpath_relation(self, relation): xpath = self.xpath(relation.selector) - combinator, subselector, *_ = relation.subselector + combinator, subselector = relation.subselector method = getattr(self, 'xpath_%s_combinator' % self.combinator_mapping[combinator.value]) return method(xpath, self.xpath(subselector)) From 7bcc7e0f8138413e2a4632d01bd30ff5a53c02d6 Mon Sep 17 00:00:00 2001 From: annbgn Date: Tue, 6 Jul 2021 01:37:52 +0300 Subject: [PATCH 03/11] expand tests on relative selectors --- cssselect/parser.py | 4 +++- cssselect/xpath.py | 39 ++++++++++++++++++++++++++++++++++----- tests/test_cssselect.py | 17 +++++++++-------- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index ac70f00..92d3dfb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -270,7 +270,9 @@ def canonical(self): def specificity(self): a1, b1, c1 = self.selector.specificity() - a2, b2, c2 = self.subselector.specificity() + a2 = b2 = c2 = 0 + if self.subselector: + a2, b2, c2 = self.subselector[-1].specificity() return a1 + a2, b1 + b2, c1 + c2 diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0673b38..f60f83d 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,6 +14,7 @@ import sys import re +import copy from cssselect.parser import parse, parse_series, SelectorError @@ -76,13 +77,13 @@ def add_star_prefix(self): """ self.path += '*/' - def join(self, combiner, other): + def join(self, combiner, other, closing_combiner=None): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. if other.path != '*/': path += other.path self.path = path - self.element = other.element + self.element = other.element + closing_combiner if closing_combiner else other.element self.condition = other.condition return self @@ -274,9 +275,14 @@ def xpath_negation(self, negation): def xpath_relation(self, relation): xpath = self.xpath(relation.selector) - combinator, subselector = relation.subselector - method = getattr(self, 'xpath_%s_combinator' % self.combinator_mapping[combinator.value]) - return method(xpath, self.xpath(subselector)) + combinator, *subselector = relation.subselector + if not subselector: + combinator.value = ' ' + right = self.xpath(combinator) + else: + right = self.xpath(subselector[0]) + method = getattr(self, 'xpath_relation_%s_combinator' % self.combinator_mapping[combinator.value]) + return method(xpath, right) def xpath_function(self, function): """Translate a functional pseudo-class.""" @@ -375,6 +381,29 @@ def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" return left.join('/following-sibling::', right) + def xpath_relation_descendant_combinator(self, left, right): + """right is a child, grand-child or further descendant of left; select left""" + return left.join('/descendant-or-self::', right, closing_combiner='/ancestor-or-self::' + left.element) + + def xpath_relation_child_combinator(self, left, right): + """right is an immediate child of left; select left""" + return left.join('[./', right, closing_combiner=']') + + def xpath_relation_direct_adjacent_combinator(self, left, right): + """right is a sibling immediately after left; select left""" + left_copy = copy.copy(left) + xpath = left.join('/following-sibling::', right) + xpath.add_name_test() + xpath.add_condition('position() = 1') + + xpath = xpath.join('/preceding-sibling::', left_copy) + xpath.add_name_test() + return xpath.add_condition('position() = 1') + + def xpath_relation_indirect_adjacent_combinator(self, left, right): + """right is a sibling after left, immediately or not; select left""" + return left.join('/following-sibling::', right, closing_combiner='/preceding-sibling::'+left.element) + # Function: dispatch by function/pseudo-class name diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 6611e86..fd6c06d 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -266,12 +266,10 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) - # assert specificity(':has(*)') == (0, 0, 0) - # assert specificity(':has(foo)') == (0, 0, 1) - # assert specificity(':has(.foo)') == (0, 1, 0) - # assert specificity(':has([foo])') == (0, 1, 0) - # assert specificity(':has(:empty)') == (0, 1, 0) - # assert specificity(':has(#foo)') == (1, 0, 0) + assert specificity(':has(*)') == (0, 0, 0) + assert specificity(':has(foo)') == (0, 0, 1) + assert specificity(':has(> foo)') == (0, 0, 1) + assert specificity('foo:empty') == (0, 1, 1) assert specificity('foo:before') == (0, 0, 2) @@ -504,8 +502,11 @@ def xpath(css): assert xpath('e:not(:nth-child(odd))') == ( "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( - "e[0]") # never matches - assert xpath('e:has(> f)') == 'e/f' + "e[0]") # never matches + assert xpath('e:has(> f)') == 'e[./f]' + assert xpath('e:has(f)') == 'e/descendant-or-self::f/ancestor-or-self::e' + assert xpath('e:has(~ f)') == 'e/following-sibling::f/preceding-sibling::e' + assert xpath('e:has(+ f)') == "e/following-sibling::*[(name() = 'f') and (position() = 1)]/preceding-sibling::*[(name() = 'e') and (position() = 1)]" assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( From 7b03ae2184338664e585d7b5d39c9fec9f47c298 Mon Sep 17 00:00:00 2001 From: annbgn Date: Mon, 12 Jul 2021 23:38:01 +0300 Subject: [PATCH 04/11] run formatter --- cssselect/parser.py | 25 ++++++++++++++----------- cssselect/xpath.py | 34 +++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 92d3dfb..11a47bb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -254,19 +254,23 @@ class Relation(object): """ Represents selector:has(subselector) """ + def __init__(self, selector, subselector): self.selector = selector self.subselector = subselector def __repr__(self): - return '%s[%r:has(%r)]' % ( - self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:has(%r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + ) def canonical(self): subsel = self.subselector.canonical() if len(subsel) > 1: - subsel = subsel.lstrip('*') - return '%s:has(%s)' % (self.selector.canonical(), subsel) + subsel = subsel.lstrip("*") + return "%s:has(%s)" % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() @@ -564,7 +568,7 @@ def parse_simple_selector(stream, inside_negation=False): if next != ('DELIM', ')'): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) - elif ident.lower() == 'has': + elif ident.lower() == "has": arguments = parse_relative_selector(stream) result = Relation(result, arguments) else: @@ -586,25 +590,24 @@ def parse_arguments(stream): if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [ ('DELIM', '+'), ('DELIM', '-')]: arguments.append(next) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) def parse_relative_selector(stream): arguments = [] stream.skip_whitespace() next = stream.next() - if next in [('DELIM', '+'), ('DELIM', '-'), ('DELIM', '>'), ('DELIM', '~')]: + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: arguments.append(next) - elif next.type in ('IDENT', 'STRING', 'NUMBER'): + elif next.type in ("IDENT", "STRING", "NUMBER"): arguments.append(Element(element=next.value)) while 1: stream.skip_whitespace() next = stream.next() - if next.type in ('IDENT', 'STRING', 'NUMBER'): + if next.type in ("IDENT", "STRING", "NUMBER"): arguments.append(Element(element=next.value)) elif next == ('DELIM', ')'): return arguments diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f60f83d..05e9be2 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -83,7 +83,9 @@ def join(self, combiner, other, closing_combiner=None): if other.path != '*/': path += other.path self.path = path - self.element = other.element + closing_combiner if closing_combiner else other.element + self.element = ( + other.element + closing_combiner if closing_combiner else other.element + ) self.condition = other.condition return self @@ -277,11 +279,14 @@ def xpath_relation(self, relation): xpath = self.xpath(relation.selector) combinator, *subselector = relation.subselector if not subselector: - combinator.value = ' ' + combinator.value = " " right = self.xpath(combinator) else: right = self.xpath(subselector[0]) - method = getattr(self, 'xpath_relation_%s_combinator' % self.combinator_mapping[combinator.value]) + method = getattr( + self, + "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], + ) return method(xpath, right) def xpath_function(self, function): @@ -383,27 +388,34 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" - return left.join('/descendant-or-self::', right, closing_combiner='/ancestor-or-self::' + left.element) + return left.join( + "/descendant-or-self::", + right, + closing_combiner="/ancestor-or-self::" + left.element, + ) def xpath_relation_child_combinator(self, left, right): """right is an immediate child of left; select left""" - return left.join('[./', right, closing_combiner=']') + return left.join("[./", right, closing_combiner="]") def xpath_relation_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left; select left""" left_copy = copy.copy(left) - xpath = left.join('/following-sibling::', right) + xpath = left.join("/following-sibling::", right) xpath.add_name_test() - xpath.add_condition('position() = 1') + xpath.add_condition("position() = 1") - xpath = xpath.join('/preceding-sibling::', left_copy) + xpath = xpath.join("/preceding-sibling::", left_copy) xpath.add_name_test() - return xpath.add_condition('position() = 1') + return xpath.add_condition("position() = 1") def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" - return left.join('/following-sibling::', right, closing_combiner='/preceding-sibling::'+left.element) - + return left.join( + "/following-sibling::", + right, + closing_combiner="/preceding-sibling::" + left.element, + ) # Function: dispatch by function/pseudo-class name From 62f737bc2d46b08cd5c9084992d6a3d757c6c4df Mon Sep 17 00:00:00 2001 From: annbgn Date: Fri, 16 Jul 2021 09:34:39 +0300 Subject: [PATCH 05/11] fix review remarks --- cssselect/parser.py | 5 ++++- cssselect/xpath.py | 12 ++---------- tests/test_cssselect.py | 16 +++++----------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 11a47bb..77033a3 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -267,7 +267,10 @@ def __repr__(self): ) def canonical(self): - subsel = self.subselector.canonical() + if not self.subselector: + subsel = '*' + else: + subsel = self.subselector[0].canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 05e9be2..c6c2a4a 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -388,11 +388,7 @@ def xpath_indirect_adjacent_combinator(self, left, right): def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" - return left.join( - "/descendant-or-self::", - right, - closing_combiner="/ancestor-or-self::" + left.element, - ) + return left.join("[descendant::", right, closing_combiner="]") def xpath_relation_child_combinator(self, left, right): """right is an immediate child of left; select left""" @@ -411,11 +407,7 @@ def xpath_relation_direct_adjacent_combinator(self, left, right): def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" - return left.join( - "/following-sibling::", - right, - closing_combiner="/preceding-sibling::" + left.element, - ) + return left.join("[following-sibling::", right, closing_combiner="]") # Function: dispatch by function/pseudo-class name diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index fd6c06d..e4c4b58 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -305,12 +305,8 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') - # css2css(':has(*)') - # css2css(':has(foo)') - # css2css(':has(*.foo)', ':has(.foo)') - # css2css(':has(*[foo])', ':has([foo])') - # css2css(':has(:empty)') - # css2css(':has(#foo)') + css2css(':has(*)') + css2css(':has(foo)') css2css('foo:empty') css2css('foo::before') css2css('foo:empty::before') @@ -504,8 +500,8 @@ def xpath(css): assert xpath('e:nOT(*)') == ( "e[0]") # never matches assert xpath('e:has(> f)') == 'e[./f]' - assert xpath('e:has(f)') == 'e/descendant-or-self::f/ancestor-or-self::e' - assert xpath('e:has(~ f)') == 'e/following-sibling::f/preceding-sibling::e' + assert xpath('e:has(f)') == 'e[descendant::f]' + assert xpath('e:has(~ f)') == 'e[following-sibling::f]' assert xpath('e:has(+ f)') == "e/following-sibling::*[(name() = 'f') and (position() = 1)]/preceding-sibling::*[(name() = 'e') and (position() = 1)]" assert xpath('e f') == ( "e/descendant-or-self::*/f") @@ -878,9 +874,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] - # assert pcss('link:has(*)') == [] - # assert pcss('link:has([href])') == ['link-href'] - # assert pcss('ol:has(div)') == ['first-ol'] + assert pcss('ol:has(div)') == ['first-ol'] assert pcss('ol.a.b.c > li.c:nth-child(3)') == ['third-li'] # Invalid characters in XPath element names, should not crash From 47f3c11c552c9d8f4d13d63880cafc0a386ddcfd Mon Sep 17 00:00:00 2001 From: annbgn Date: Fri, 16 Jul 2021 20:23:23 +0300 Subject: [PATCH 06/11] fix lint --- tests/test_cssselect.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 3c1240c..dd099c2 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -270,9 +270,9 @@ def specificity(css): assert specificity(':not(:empty)') == (0, 1, 0) assert specificity(':not(#foo)') == (1, 0, 0) - assert specificity(':has(*)') == (0, 0, 0) - assert specificity(':has(foo)') == (0, 0, 1) - assert specificity(':has(> foo)') == (0, 0, 1) + assert specificity(":has(*)") == (0, 0, 0) + assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(> foo)") == (0, 0, 1) assert specificity(':is(.foo, #bar)') == (1, 0, 0) assert specificity(':is(:hover, :visited)') == (0, 1, 0) @@ -311,8 +311,8 @@ def css2css(css, res=None): css2css(':not(*[foo])', ':not([foo])') css2css(':not(:empty)') css2css(':not(#foo)') - css2css(':has(*)') - css2css(':has(foo)') + css2css(":has(*)") + css2css(":has(foo)") css2css(':is(#bar, .foo)') css2css(':is(:focused, :visited)') css2css('foo:empty') @@ -511,10 +511,14 @@ def xpath(css): "e[not(count(preceding-sibling::*) mod 2 = 0)]") assert xpath('e:nOT(*)') == ( "e[0]") # never matches - assert xpath('e:has(> f)') == 'e[./f]' - assert xpath('e:has(f)') == 'e[descendant::f]' - assert xpath('e:has(~ f)') == 'e[following-sibling::f]' - assert xpath('e:has(+ f)') == "e/following-sibling::*[(name() = 'f') and (position() = 1)]/preceding-sibling::*[(name() = 'e') and (position() = 1)]" + assert xpath("e:has(> f)") == "e[./f]" + assert xpath("e:has(f)") == "e[descendant::f]" + assert xpath("e:has(~ f)") == "e[following-sibling::f]" + assert ( + xpath("e:has(+ f)") + == "e/following-sibling::*[(name() = 'f') and (position() = 1)]" + "/preceding-sibling::*[(name() = 'e') and (position() = 1)]" + ) assert xpath('e f') == ( "e/descendant-or-self::*/f") assert xpath('e > f') == ( @@ -886,7 +890,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] - assert pcss('ol:has(div)') == ['first-ol'] + assert pcss("ol:has(div)") == ["first-ol"] assert pcss(':is(#first-li, #second-li)') == [ 'first-li', 'second-li'] assert pcss('a:is(#name-anchor, #tag-anchor)') == [ From b64eacf664280bd720c68d7c8a55688a356d722a Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 21 Jul 2021 13:27:39 +0300 Subject: [PATCH 07/11] simplify test + run ```darker master HEAD``` --- cssselect/parser.py | 29 +++++++++++++++-------------- cssselect/xpath.py | 20 ++++++-------------- tests/test_cssselect.py | 3 +-- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index d7770a1..9d48dc7 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -268,7 +268,7 @@ def __repr__(self): def canonical(self): if not self.subselector: - subsel = '*' + subsel = "*" else: subsel = self.subselector[0].canonical() if len(subsel) > 1: @@ -287,22 +287,24 @@ class Matching(object): """ Represents selector:is(selector_list) """ + def __init__(self, selector, selector_list): self.selector = selector self.selector_list = selector_list def __repr__(self): - return '%s[%r:is(%s)]' % ( - self.__class__.__name__, self.selector, ", ".join( - map(repr, self.selector_list))) + return "%s[%r:is(%s)]" % ( + self.__class__.__name__, + self.selector, + ", ".join(map(repr, self.selector_list)), + ) def canonical(self): selector_arguments = [] for s in self.selector_list: selarg = s.canonical() - selector_arguments.append(selarg.lstrip('*')) - return '%s:is(%s)' % (self.selector.canonical(), - ", ".join(map(str, selector_arguments))) + selector_arguments.append(selarg.lstrip("*")) + return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) def specificity(self): return max([x.specificity() for x in self.selector_list]) @@ -600,7 +602,7 @@ def parse_simple_selector(stream, inside_negation=False): elif ident.lower() == "has": arguments = parse_relative_selector(stream) result = Relation(result, arguments) - elif ident.lower() in ('matches', 'is'): + elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) else: @@ -654,20 +656,19 @@ def parse_simple_selector_arguments(stream): result, pseudo_element = parse_simple_selector(stream, True) if pseudo_element: raise SelectorSyntaxError( - 'Got pseudo-element ::%s inside function' - % (pseudo_element, )) + "Got pseudo-element ::%s inside function" % (pseudo_element,) + ) stream.skip_whitespace() next = stream.next() - if next in (('EOF', None), ('DELIM', ',')): + if next in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) return arguments diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f8930b1..13bc590 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -55,9 +55,9 @@ def __str__(self): def __repr__(self): return '%s[%s]' % (self.__class__.__name__, self) - def add_condition(self, condition, conjuction='and'): + def add_condition(self, condition, conjuction="and"): if self.condition: - self.condition = '(%s) %s (%s)' % (self.condition, conjuction, condition) + self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) else: self.condition = condition return self @@ -83,9 +83,7 @@ def join(self, combiner, other, closing_combiner=None): if other.path != '*/': path += other.path self.path = path - self.element = ( - other.element + closing_combiner if closing_combiner else other.element - ) + self.element = other.element + closing_combiner if closing_combiner else other.element self.condition = other.condition return self @@ -295,7 +293,7 @@ def xpath_matching(self, matching): for e in exprs: e.add_name_test() if e.condition: - xpath.add_condition(e.condition, 'or') + xpath.add_condition(e.condition, "or") return xpath def xpath_function(self, function): @@ -405,14 +403,8 @@ def xpath_relation_child_combinator(self, left, right): def xpath_relation_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left; select left""" - left_copy = copy.copy(left) - xpath = left.join("/following-sibling::", right) - xpath.add_name_test() - xpath.add_condition("position() = 1") - - xpath = xpath.join("/preceding-sibling::", left_copy) - xpath.add_name_test() - return xpath.add_condition("position() = 1") + xpath = left.add_condition("following-sibling::{}[position() = 1]".format(right.element)) + return xpath def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index dd099c2..e3e4761 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -516,8 +516,7 @@ def xpath(css): assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( xpath("e:has(+ f)") - == "e/following-sibling::*[(name() = 'f') and (position() = 1)]" - "/preceding-sibling::*[(name() = 'e') and (position() = 1)]" + == "e[following-sibling::f[position() = 1]]" ) assert xpath('e f') == ( "e/descendant-or-self::*/f") From 72bd7762df6961f39cd8a18939cbc61b97b71a84 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Fri, 23 Jul 2021 10:47:23 -0300 Subject: [PATCH 08/11] Revert xpath translation change to make it consistent --- cssselect/xpath.py | 4 +++- tests/test_cssselect.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 13bc590..d7a2203 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -403,7 +403,9 @@ def xpath_relation_child_combinator(self, left, right): def xpath_relation_direct_adjacent_combinator(self, left, right): """right is a sibling immediately after left; select left""" - xpath = left.add_condition("following-sibling::{}[position() = 1]".format(right.element)) + xpath = left.add_condition( + "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + ) return xpath def xpath_relation_indirect_adjacent_combinator(self, left, right): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index e3e4761..5552b78 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -516,7 +516,7 @@ def xpath(css): assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( xpath("e:has(+ f)") - == "e[following-sibling::f[position() = 1]]" + == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" ) assert xpath('e f') == ( "e/descendant-or-self::*/f") From 41a0f7f3cd5abf3228fdf50218e6c3c4ed5db46b Mon Sep 17 00:00:00 2001 From: annbgn Date: Sun, 25 Jul 2021 21:43:44 +0300 Subject: [PATCH 09/11] add test, expand :has() to accept more complex arguments, remove useless ifs --- cssselect/parser.py | 38 ++++++++++++++++++++++---------------- cssselect/xpath.py | 9 +++------ tests/test_cssselect.py | 11 +++++++++++ 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 9d48dc7..43d55eb 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -255,8 +255,9 @@ class Relation(object): Represents selector:has(subselector) """ - def __init__(self, selector, subselector): + def __init__(self, selector, combinator, subselector): self.selector = selector + self.combinator = combinator self.subselector = subselector def __repr__(self): @@ -267,19 +268,20 @@ def __repr__(self): ) def canonical(self): - if not self.subselector: - subsel = "*" - else: + try: subsel = self.subselector[0].canonical() + except TypeError: + subsel = self.subselector.canonical() if len(subsel) > 1: subsel = subsel.lstrip("*") return "%s:has(%s)" % (self.selector.canonical(), subsel) def specificity(self): a1, b1, c1 = self.selector.specificity() - a2 = b2 = c2 = 0 - if self.subselector: + try: a2, b2, c2 = self.subselector[-1].specificity() + except TypeError: + a2, b2, c2 = self.subselector.specificity() return a1 + a2, b1 + b2, c1 + c2 @@ -600,8 +602,8 @@ def parse_simple_selector(stream, inside_negation=False): raise SelectorSyntaxError("Expected ')', got %s" % (next,)) result = Negation(result, argument) elif ident.lower() == "has": - arguments = parse_relative_selector(stream) - result = Relation(result, arguments) + combinator, arguments = parse_relative_selector(stream) + result = Relation(result, combinator, arguments) elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) @@ -631,23 +633,27 @@ def parse_arguments(stream): def parse_relative_selector(stream): - arguments = [] stream.skip_whitespace() + subselector = "" next = stream.next() + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: - arguments.append(next) - elif next.type in ("IDENT", "STRING", "NUMBER"): - arguments.append(Element(element=next.value)) - while 1: + combinator = next stream.skip_whitespace() next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER"): - arguments.append(Element(element=next.value)) + else: + combinator = Token("DELIM", " ", pos=0) + + while 1: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: + subselector += next.value elif next == ('DELIM', ')'): - return arguments + result = parse(subselector) + return combinator, result[0] else: raise SelectorSyntaxError( "Expected an argument, got %s" % (next,)) + next = stream.next() def parse_simple_selector_arguments(stream): diff --git a/cssselect/xpath.py b/cssselect/xpath.py index d7a2203..82c03f1 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -275,12 +275,9 @@ def xpath_negation(self, negation): def xpath_relation(self, relation): xpath = self.xpath(relation.selector) - combinator, *subselector = relation.subselector - if not subselector: - combinator.value = " " - right = self.xpath(combinator) - else: - right = self.xpath(subselector[0]) + combinator = relation.combinator + subselector = relation.subselector + right = self.xpath(subselector.parsed_tree) method = getattr( self, "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 5552b78..78f2558 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -145,6 +145,8 @@ def parse_many(first, *others): 'Hash[Element[div]#foobar]'] assert parse_many('div:not(div.foo)') == [ 'Negation[Element[div]:not(Class[Element[div].foo])]'] + assert parse_many('div:has(div.foo)') == [ + 'Relation[Element[div]:has(Selector[Class[Element[div].foo]])]'] assert parse_many('div:is(.foo, #bar)') == [ 'Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]'] assert parse_many(':is(:hover, :visited)') == [ @@ -272,6 +274,7 @@ def specificity(css): assert specificity(":has(*)") == (0, 0, 0) assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(.foo)") == (0, 1, 0) assert specificity(":has(> foo)") == (0, 0, 1) assert specificity(':is(.foo, #bar)') == (1, 0, 0) @@ -313,6 +316,7 @@ def css2css(css, res=None): css2css(':not(#foo)') css2css(":has(*)") css2css(":has(foo)") + css2css(':has(*.foo)', ':has(.foo)') css2css(':is(#bar, .foo)') css2css(':is(:focused, :visited)') css2css('foo:empty') @@ -400,6 +404,12 @@ def get_error(css): ) assert get_error('> div p') == ("Expected selector, got ' at 0>") + # Unsupported :has() with several arguments + assert get_error(':has(a, b)') == ( + "Expected an argument, got ") + assert get_error(':has()') == ( + "Expected selector, got ") + def test_translation(self): def xpath(css): return _unicode(GenericTranslator().css_to_xpath(css, prefix='')) @@ -889,6 +899,7 @@ def pcss(main, *selectors, **kwargs): assert pcss('ol :Not(li[class])') == [ 'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'] + assert pcss('link:has(*)') == [] assert pcss("ol:has(div)") == ["first-ol"] assert pcss(':is(#first-li, #second-li)') == [ 'first-li', 'second-li'] From 52bbdd11ae8c9fc63ce0755fd545f618ac0d49c5 Mon Sep 17 00:00:00 2001 From: annbgn Date: Tue, 3 Aug 2021 22:34:38 +0300 Subject: [PATCH 10/11] run black --- cssselect/parser.py | 6 ++---- cssselect/xpath.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index 74268c8..f1ccf98 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -661,16 +661,14 @@ def parse_relative_selector(stream): while 1: if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: subselector += next.value - elif next == ('DELIM', ')'): + elif next == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError( - "Expected an argument, got %s" % (next,)) + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) next = stream.next() - def parse_simple_selector_arguments(stream): arguments = [] while 1: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index 0485ce9..b9ff1d2 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -386,7 +386,7 @@ def xpath_direct_adjacent_combinator(self, left, right): def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" - return left.join('/following-sibling::', right) + return left.join("/following-sibling::", right) def xpath_relation_descendant_combinator(self, left, right): """right is a child, grand-child or further descendant of left; select left""" From 3c86499183f6c74ddad51ed88b44c63037a098f7 Mon Sep 17 00:00:00 2001 From: annbgn Date: Wed, 4 Aug 2021 00:11:00 +0300 Subject: [PATCH 11/11] support parsing complex selector in :not() --- cssselect/parser.py | 20 ++++++++++++++++---- cssselect/xpath.py | 34 ++++++++++++++++++++++++++++++++-- tests/test_cssselect.py | 25 +++++++++++++++++++------ 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index f1ccf98..4fbeb2f 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -238,12 +238,22 @@ class Negation(object): Represents selector:not(subselector) """ - def __init__(self, selector, subselector): + def __init__(self, selector, subselector, combinator=None, subselector2=None): self.selector = selector self.subselector = subselector + self.combinator = combinator + self.subselector2 = subselector2 def __repr__(self): - return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) + if self.combinator is None and self.subselector2 is None: + return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:not(%r %s %r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + self.combinator.value, + self.subselector2.parsed_tree, + ) def canonical(self): subsel = self.subselector.canonical() @@ -614,9 +624,11 @@ def parse_simple_selector(stream, inside_negation=False): "Got pseudo-element ::%s inside :not() at %s" % (argument_pseudo_element, next.pos) ) + combinator = arguments = None if next != ("DELIM", ")"): - raise SelectorSyntaxError("Expected ')', got %s" % (next,)) - result = Negation(result, argument) + stream.skip_whitespace() + combinator, arguments = parse_relative_selector(stream) + result = Negation(result, argument, combinator, arguments) elif ident.lower() == "has": combinator, arguments = parse_relative_selector(stream) result = Relation(result, combinator, arguments) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index b9ff1d2..47cb755 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -270,10 +270,19 @@ def xpath_combinedselector(self, combined): def xpath_negation(self, negation): xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) - sub_xpath.add_name_test() - if sub_xpath.condition: + if negation.combinator is not None and negation.subselector2 is not None: + sub2_xpath = self.xpath(negation.subselector2.parsed_tree) + method = getattr( + self, + "xpath_negation_%s_combinator" + % self.combinator_mapping[negation.combinator.value], + ) + return method(xpath, sub_xpath, sub2_xpath) + elif sub_xpath.condition: + sub_xpath.add_name_test() return xpath.add_condition("not(%s)" % sub_xpath.condition) else: + sub_xpath.add_name_test() return xpath.add_condition("0") def xpath_relation(self, relation): @@ -407,6 +416,27 @@ def xpath_relation_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not; select left""" return left.join("[following-sibling::", right, closing_combiner="]") + def xpath_negation_descendant_combinator(self, xpath, left, right): + xpath.add_condition('not(name()="%s" and ancestor::*[name()="%s"])' % (right, left)) + return xpath + + def xpath_negation_child_combinator(self, xpath, left, right): + xpath.add_condition('not(name()="%s" and parent::*[name()="%s"])' % (right, left)) + return xpath + + def xpath_negation_direct_adjacent_combinator(self, xpath, left, right): + xpath.add_condition( + 'not(name()="%s" and following-sibling::*[position()=1 and name()="%s"])' + % (right, left) + ) + return xpath + + def xpath_negation_indirect_adjacent_combinator(self, xpath, left, right): + xpath.add_condition( + 'not(name()="%s" and following-sibling::*[name()="%s"])' % (right, left) + ) + return xpath + # Function: dispatch by function/pseudo-class name def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index ba64f6f..5d28c3a 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -145,6 +145,10 @@ def parse_many(first, *others): assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"] assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"] assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"] + assert parse_many(":not(a > b)") == ["Negation[Element[*]:not(Element[a] > Element[b])]"] + assert parse_many(":not(a + b)") == ["Negation[Element[*]:not(Element[a] + Element[b])]"] + assert parse_many(":not(a ~ b)") == ["Negation[Element[*]:not(Element[a] ~ Element[b])]"] + assert parse_many(":not(a b)") == ["Negation[Element[*]:not(Element[a] Element[b])]"] assert parse_many("div:not(div.foo)") == [ "Negation[Element[div]:not(Class[Element[div].foo])]" ] @@ -391,10 +395,8 @@ def get_error(css): assert get_error("> div p") == ("Expected selector, got ' at 0>") # Unsupported :has() with several arguments - assert get_error(':has(a, b)') == ( - "Expected an argument, got ") - assert get_error(':has()') == ( - "Expected selector, got ") + assert get_error(":has(a, b)") == ("Expected an argument, got ") + assert get_error(":has()") == ("Expected selector, got ") def test_translation(self): def xpath(css): @@ -470,12 +472,23 @@ def xpath(css): assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") assert xpath("e:root") == ("e[not(parent::*)]") assert xpath("e:hover") == ("e[0]") # never matches + assert xpath("*:not(a > b)") == ( + '*[not(name()="b" and parent::*[name()="a"])]' + ) # select anything that is not b or doesn't have a parent a + assert xpath("*:not(a + b)") == ( + '*[not(name()="b" and following-sibling::*[position()=1 and name()="a"])]' + ) # select anything that is not b or doesn't have an immediate sibling a + assert xpath("*:not(a ~ b)") == ( + '*[not(name()="b" and following-sibling::*[name()="a"])]' + ) # select anything that is not b or doesn't have a sibling a + assert xpath("*:not(a b)") == ( + '*[not(name()="b" and ancestor::*[name()="a"])]' + ) # select anything that is not b or doesn't have an ancestor a assert xpath("e:has(> f)") == "e[./f]" assert xpath("e:has(f)") == "e[descendant::f]" assert xpath("e:has(~ f)") == "e[following-sibling::f]" assert ( - xpath("e:has(+ f)") - == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]")