From b4f779b261c60b28d40dbfae90851bbff7d2c52f Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Sat, 23 Apr 2022 21:53:52 +0200 Subject: [PATCH] fix: two issues with the N3 serializer This patch fixes two issues with the N3 serializer: - The N3 serializer incorrectly considered a subject as already serialized if it has been serialized inside a quoted graph. - The N3 serializer does not consider that the predicate of a triple can also be a graph. Other changes included in this patch: - Changed `test.testutils.GraphHelper` to support nested/quoted graphs. - Moved the tests from `test/test_n3_formula.py` into `test/test_serializers/test_serializer_n3.py`. - Include positive syntax tests from the N3 test suite that is smaller than 1024KB and that is not using new N3 syntax into round trip tests. This is mainly to check that there is no regressions after the changes made. Fixes: - https://github.com/RDFLib/rdflib/issues/1807 - https://github.com/RDFLib/rdflib/issues/1701 --- rdflib/plugins/serializers/n3.py | 33 +-- test/test_n3_formula.py | 57 ----- test/test_roundtrip.py | 236 ++++++++++++++++++-- test/test_serializers/test_serializer_n3.py | 150 +++++++++++++ test/utils/__init__.py | 147 ++++++++---- test/utils/test/test_testutils.py | 36 +++ 6 files changed, 507 insertions(+), 152 deletions(-) delete mode 100644 test/test_n3_formula.py create mode 100644 test/test_serializers/test_serializer_n3.py diff --git a/rdflib/plugins/serializers/n3.py b/rdflib/plugins/serializers/n3.py index f82a08a2a..1135ff9a3 100644 --- a/rdflib/plugins/serializers/n3.py +++ b/rdflib/plugins/serializers/n3.py @@ -23,36 +23,6 @@ def reset(self): super(N3Serializer, self).reset() self._stores = {} - def subjectDone(self, subject): - super(N3Serializer, self).subjectDone(subject) - if self.parent: - self.parent.subjectDone(subject) - - def isDone(self, subject): - return super(N3Serializer, self).isDone(subject) and ( - not self.parent or self.parent.isDone(subject) - ) - - def startDocument(self): - super(N3Serializer, self).startDocument() - # if not isinstance(self.store, N3Store): - # return - # - # all_list = [self.label(var) for var in - # self.store.get_universals(recurse=False)] - # all_list.sort() - # some_list = [self.label(var) for var in - # self.store.get_existentials(recurse=False)] - # some_list.sort() - # - # for var in all_list: - # self.write('\n'+self.indent()+'@forAll %s. '%var) - # for var in some_list: - # self.write('\n'+self.indent()+'@forSome %s. '%var) - # - # if (len(all_list) + len(some_list)) > 0: - # self.write('\n') - def endDocument(self): if not self.parent: super(N3Serializer, self).endDocument() @@ -68,6 +38,9 @@ def preprocessTriple(self, triple): if isinstance(triple[0], Graph): for t in triple[0]: self.preprocessTriple(t) + if isinstance(triple[1], Graph): + for t in triple[1]: + self.preprocessTriple(t) if isinstance(triple[2], Graph): for t in triple[2]: self.preprocessTriple(t) diff --git a/test/test_n3_formula.py b/test/test_n3_formula.py deleted file mode 100644 index 08cafd17a..000000000 --- a/test/test_n3_formula.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging - -import pytest - -import rdflib -import rdflib.term - -logger = logging.getLogger(__name__) - - -@pytest.mark.xfail( - reason="""\ -N3 serializer randomly omits triple. See https://github.com/RDFLib/rdflib/issues/1807 -""", - raises=AssertionError, -) -def test(): - test_n3 = """@prefix rdf: . - @prefix rdfs: . - @prefix : . - {:a :b :c;a :foo} => {:a :d :c,?y} . - _:foo a rdfs:Class . - :a :d :c .""" - graph1 = rdflib.Graph() - graph1.parse(data=test_n3, format="n3") - - if logger.isEnabledFor(logging.DEBUG): - logging.debug("sorted(list(graph1)) = \n%s", sorted(list(graph1))) - - """ - >>> sorted(list(graph1)) - [ - ( - rdflib.term.BNode('fde0470d85a044b6780f0c6804b119063b1'), - rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), - rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Class') - ), - ( - rdflib.term.URIRef('http://test/a'), - rdflib.term.URIRef('http://test/d'), - rdflib.term.URIRef('http://test/c') - ), - ( - )>, - rdflib.term.URIRef('http://www.w3.org/2000/10/swap/log#implies'), - )> - ) - ] - """ - - graph2 = rdflib.Graph() - graph2.parse(data=graph1.serialize(format="n3"), format="n3") - assert ( - rdflib.term.URIRef('http://test/a'), - rdflib.term.URIRef('http://test/d'), - rdflib.term.URIRef('http://test/c'), - ) in graph2 diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index 98c303234..8c1f0987e 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -1,10 +1,10 @@ +import enum import logging import os.path -from json.decoder import JSONDecodeError from pathlib import Path from test.data import TEST_DATA_DIR from test.utils import GraphHelper -from typing import Callable, Collection, Iterable, List, Optional, Set, Tuple, Union +from typing import Callable, Iterable, List, Optional, Set, Tuple, Type, Union from xml.sax import SAXParseException import pytest @@ -12,7 +12,9 @@ import rdflib import rdflib.compare +from rdflib.graph import ConjunctiveGraph, Graph from rdflib.namespace import XSD +from rdflib.parser import create_input_source from rdflib.plugins.parsers.notation3 import BadSyntax from rdflib.util import guess_format @@ -184,6 +186,24 @@ """, raises=AssertionError, ), + ("n3", "data/suites/w3c/n3/N3Tests/cwm_syntax/decimal.n3"): pytest.mark.xfail( + raises=AssertionError, + reason="""double mismatch + - (rdflib.term.Literal('1.328435e+55', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')), + + (rdflib.term.Literal('1.3284347025749857e+55', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')), + """, + ), + ("n3", "data/suites/w3c/n3/N3Tests/cwm_syntax/decimal-ref.n3"): pytest.mark.xfail( + raises=AssertionError, + reason="""double mismatch + - (rdflib.term.Literal('1.328435e+55', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')), + + (rdflib.term.Literal('1.32843470257e+55', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double')), + """, + ), + ( + "n3", + "data/suites/w3c/n3/N3Tests/cwm_syntax/neg-single-quote.n3", + ): pytest.mark.xfail(raises=BadSyntax, reason="no support for single quotes"), } # This is for files which can only be represented properly in one format @@ -193,10 +213,10 @@ def collect_files( - directory: Path, exclude_names: Optional[Set[str]] = None + directory: Path, exclude_names: Optional[Set[str]] = None, pattern: str = "**/*" ) -> List[Tuple[Path, str]]: result = [] - for path in directory.glob("**/*"): + for path in directory.glob(pattern): if not path.is_file(): continue if exclude_names is not None and path.name in exclude_names: @@ -208,20 +228,40 @@ def collect_files( return result -def roundtrip(infmt: str, testfmt: str, source: Path) -> None: - g1 = rdflib.ConjunctiveGraph() +class Check(enum.Enum): + ISOMORPHIC = enum.auto() + SET_EQUALS = enum.auto() + SET_EQUALS_WITHOUT_BLANKS = enum.auto() - g1.parse(source, format=infmt) + +def roundtrip( + infmt: str, + testfmt: str, + source: Path, + graph_type: Type[Graph] = ConjunctiveGraph, + checks: Optional[Set[Check]] = None, + same_public_id: bool = False, +) -> None: + g1 = graph_type() + + if same_public_id: + input_source = create_input_source(source) + g1.parse(input_source, format=infmt) + else: + g1.parse(source, format=infmt) s = g1.serialize(format=testfmt) if logger.isEnabledFor(logging.DEBUG): - logger.debug("serailized = \n%s", s) + logger.debug("source = %s, serailized = \n%s", source, s) - g2 = rdflib.ConjunctiveGraph() - g2.parse(data=s, format=testfmt) + g2 = graph_type() + if same_public_id: + g2.parse(data=s, publicID=input_source.getPublicId(), format=testfmt) + else: + g2.parse(data=s, format=testfmt) - if testfmt == "hext": + if testfmt == "hext" and isinstance(g2, ConjunctiveGraph): # HexTuples always sets Literal("abc") -> Literal("abc", datatype=XSD.string) # and this prevents roundtripping since most other formats don't equate "" with # ""^^xsd:string, at least not in these tests @@ -239,7 +279,13 @@ def roundtrip(infmt: str, testfmt: str, source: Path) -> None: logger.debug("Items in G1 Only:\n%s", GraphHelper.format_graph_set(first)) logger.debug("Items in G2 Only:\n%s", GraphHelper.format_graph_set(second)) - GraphHelper.assert_isomorphic(g1, g2) + if checks is None or Check.ISOMORPHIC in checks: + GraphHelper.assert_isomorphic(g1, g2) + if checks is not None: + if Check.SET_EQUALS in checks: + GraphHelper.assert_sets_equals(g1, g2, exclude_blanks=False) + if Check.SET_EQUALS_WITHOUT_BLANKS in checks: + GraphHelper.assert_sets_equals(g1, g2, exclude_blanks=True) if logger.isEnabledFor(logging.DEBUG): logger.debug("OK") @@ -262,9 +308,15 @@ def get_formats() -> Set[str]: def make_cases( - files: Collection[Tuple[Path, str]], hext_okay: bool = False + files: Iterable[Tuple[Path, str]], + formats: Optional[Set[str]] = None, + hext_okay: bool = False, + checks: Optional[Set[Check]] = None, + graph_type: Type[Graph] = ConjunctiveGraph, + same_public_id: bool = False, ) -> Iterable[ParameterSet]: - formats = get_formats() + if formats is None: + formats = get_formats() for testfmt in formats: # if testfmt == "hext": # continue @@ -278,10 +330,24 @@ def make_cases( continue marks: List[Union[MarkDecorator, Mark]] = [] xfail = XFAILS.get((testfmt, f.name)) - if xfail: + if xfail is None: + xfail = XFAILS.get( + (testfmt, f"{f.relative_to(TEST_DATA_DIR.parent).as_posix()}") + ) + if xfail is not None: marks.append(xfail) id = f"roundtrip_{os.path.basename(f)}_{infmt}_{testfmt}" - values = (roundtrip, (infmt, testfmt, f)) + values = ( + lambda infmt, testfmt, f: roundtrip( + infmt, + testfmt, + f, + checks=checks, + graph_type=graph_type, + same_public_id=same_public_id, + ), + (infmt, testfmt, f), + ) logging.debug("values = %s", values) yield pytest.param(*values, marks=marks, id=id) @@ -305,6 +371,144 @@ def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Pat checker(*args) +N3_W3C_SUITE_DIR = Path(TEST_DATA_DIR) / "suites" / "w3c" / "n3" + +""" +List generated with: + +sparql --base '.' --query <(echo ' +PREFIX rdfs: +PREFIX rdft: +PREFIX mf: +PREFIX test: +SELECT DISTINCT ?file WHERE { + ?test a test:TestN3PositiveSyntax. + ?test mf:action ?file +} +') --data test/data/suites/w3c/n3/N3Tests/manifest-parser.ttl --results=TSV \ + | sed 1d \ + | sed -E 's,^.*(test/data/suites/.*)>$,\1,g' \ + | grep -v '/new_syntax/' \ + | xargs -I{} find {} -printf '%p:%s\n' \ + | gawk -F: '($2 <= 1024){ print $1 }' \ + | xargs egrep -c '[?]\S+' | sort \ + | sed -E -n 's|^test/data/suites/w3c/n3/(.*):0| (N3_W3C_SUITE_DIR / "\1", "n3"),|gp' +""" + +N3_W3C_SUITE_FILES = [ + (N3_W3C_SUITE_DIR / "N3Tests/cwm_andy/D-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_i18n/hiragana.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_i18n/i18n.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_i18n/umlaut.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/bnode-conclude-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/builtins.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/concat-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/conjunction-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/foo.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/list-in-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/t10a.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/t1.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/t2.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_includes/t3.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/append-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/bnode_in_list_in_list.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/builtin_generated_match-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/construct.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/list-bug1-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/list-bug2-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/r1-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/unify2-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/unify3-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/unify4-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_list/unify5-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_math/long.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/anon-prop.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/anonymous_loop.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/classes.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/contexts.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/daml-pref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/equiv-syntax.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/filter-bnodes.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/invalid-ex.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/kb1.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/lists.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/lists-simple.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/reluri-1.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/t00-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/t01-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_other/underbarscope.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/double-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/socrates-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t1.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t1-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t2.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t2-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t3.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t3-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t4-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t5-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t6-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t8-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t9.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/t9-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_reason/timbl.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_string/endsWith-out.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_supports/simple.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_supports/simple-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/a1.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/bad-preds-formula.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/bad-preds-literal.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/base.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/base-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/BnodeAcrossFormulae.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/boolean.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/boolean-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/colon-no-qname.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/decimal.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/decimal-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/embedded-dot-in-qname.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/formula_bnode.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/formula-simple-1.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/formula-subject.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/graph-as-object.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/neg-formula-predicate.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/neg-literal-predicate.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/neg-single-quote.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/nested.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/one-bnode.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/qvars3.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/sep-term.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/sib.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/space-in-uri-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/this-rules-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/trailing-semicolon-ref.nt", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/zero-length-lname.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_syntax/zero-predicates.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_unify/reflexive-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_unify/unify1-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_unify/unify2.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/cwm_unify/unify2-ref.n3", "n3"), + (N3_W3C_SUITE_DIR / "N3Tests/extra/good_prefix.n3", "n3"), +] + + +@pytest.mark.parametrize( + "checker, args", + make_cases( + N3_W3C_SUITE_FILES, + formats={"n3"}, + # NOTE: Isomomorphic check does not work on Quoted Graphs + checks={Check.SET_EQUALS_WITHOUT_BLANKS}, + graph_type=Graph, + same_public_id=True, + ), +) +def test_n3_suite( + checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path] +): + checker(*args) + + EXTRA_FILES = [ (TEST_DATA_DIR / "variants" / "special_chars.nt", "ntriples"), (TEST_DATA_DIR / "variants" / "xml_literal.rdf", "xml"), diff --git a/test/test_serializers/test_serializer_n3.py b/test/test_serializers/test_serializer_n3.py new file mode 100644 index 000000000..4a008105a --- /dev/null +++ b/test/test_serializers/test_serializer_n3.py @@ -0,0 +1,150 @@ +import logging +from test.utils import GraphHelper + +import rdflib +import rdflib.term +from rdflib import Graph +from rdflib.graph import QuotedGraph +from rdflib.namespace import Namespace +from rdflib.plugins.parsers.notation3 import LOG_implies_URI +from rdflib.term import BNode, URIRef + +logger = logging.getLogger(__name__) + + +def test_implies(): + test_n3 = """@prefix rdf: . + @prefix rdfs: . + @prefix : . + {:a :b :c;a :foo} => {:a :d :c,?y} . + _:foo a rdfs:Class . + :a :d :c .""" + graph1 = rdflib.Graph() + graph1.parse(data=test_n3, format="n3") + + if logger.isEnabledFor(logging.DEBUG): + logging.debug("sorted(list(graph1)) = \n%s", sorted(list(graph1))) + + """ + >>> sorted(list(graph1)) + [ + ( + rdflib.term.BNode('fde0470d85a044b6780f0c6804b119063b1'), + rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), + rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#Class') + ), + ( + rdflib.term.URIRef('http://test/a'), + rdflib.term.URIRef('http://test/d'), + rdflib.term.URIRef('http://test/c') + ), + ( + )>, + rdflib.term.URIRef('http://www.w3.org/2000/10/swap/log#implies'), + )> + ) + ] + """ + + graph2 = rdflib.Graph() + graph2.parse(data=graph1.serialize(format="n3"), format="n3") + assert ( + rdflib.term.URIRef('http://test/a'), + rdflib.term.URIRef('http://test/d'), + rdflib.term.URIRef('http://test/c'), + ) in graph2 + + +EG = Namespace("http://example.com/") + +LOG_implies = URIRef(LOG_implies_URI) + + +def test_merging() -> None: + data_a = """ + @prefix : . + :a :b :c. + """ + data_b = """ + @prefix : . + {:a :b :c} => {:d :e :f}. + """ + graph = Graph() + assert (EG.a, EG.b, EG.c) not in graph + + graph.parse(data=data_a, format="n3") + assert (EG.a, EG.b, EG.c) in graph + + graph.parse(data=data_b, format="n3") + assert (EG.a, EG.b, EG.c) in graph + assert len(set(graph.triples((None, LOG_implies, None)))) == 1 + + data_s = graph.serialize(format="n3") + logging.debug("data_s = %s", data_s) + + graph = Graph() + graph.parse(data=data_s, format="n3") + quad_set = GraphHelper.triple_set(graph) + + assert (EG.a, EG.b, EG.c) in graph + assert len(set(graph.triples((None, LOG_implies, None)))) == 1 + + logging.debug("quad_set = %s", quad_set) + + +def test_single_simple_triple() -> None: + data_a = """ + @prefix : . + :a :b :c. + """ + graph = Graph() + assert (EG.a, EG.b, EG.c) not in graph + + graph.parse(data=data_a, format="n3") + assert (EG.a, EG.b, EG.c) in graph + + data_s = graph.serialize(format="n3") + logging.debug("data_s = %s", data_s) + + graph = Graph() + graph.parse(data=data_s, format="n3") + quad_set = GraphHelper.triple_set(graph) + + assert (EG.a, EG.b, EG.c) in graph + + logging.debug("quad_set = %s", quad_set) + + +def test_implies_nothing() -> None: + triple_a = (EG.a, EG.b, EG.c) + graph = Graph() + qgraph_a = QuotedGraph(graph.store, BNode()) + qgraph_a.add(triple_a) + qgraph_b = QuotedGraph(graph.store, BNode()) + graph.add((qgraph_a, LOG_implies, qgraph_b)) + graph.add(triple_a) + + data_s = graph.serialize(format="n3") + logging.debug("data_s = %s", data_s) + + rgraph = Graph() + rgraph.parse(data=data_s, format="n3") + graph_qs, qgraph_a_qs, qgraph_b_qs = GraphHelper.triple_sets( + (rgraph, qgraph_a, qgraph_b) + ) + logging.debug("graph_qs = %s", graph_qs) + logging.debug("qgraph_a_qs = %s", qgraph_a_qs) + logging.debug("qgraph_b_qs = %s", qgraph_b_qs) + + assert len(graph_qs) == 2 + assert len(qgraph_a_qs) == 1 + assert len(qgraph_b_qs) == 0 + + triple_b = (qgraph_a_qs, LOG_implies, qgraph_b_qs) + + logging.debug("triple_a = %s", triple_a) + logging.debug("triple_b = %s", triple_b) + + assert triple_a in graph_qs + assert triple_a in rgraph + assert triple_b in graph_qs diff --git a/test/utils/__init__.py b/test/utils/__init__.py index b14837fb8..d98461ef9 100644 --- a/test/utils/__init__.py +++ b/test/utils/__init__.py @@ -7,11 +7,9 @@ from __future__ import print_function -import datetime import email.message -import os +import pprint import random -import sys import unittest from contextlib import AbstractContextManager, contextmanager from http.server import BaseHTTPRequestHandler, HTTPServer, SimpleHTTPRequestHandler @@ -25,6 +23,7 @@ Callable, Collection, Dict, + FrozenSet, Generator, Iterable, Iterator, @@ -43,7 +42,6 @@ from urllib.parse import ParseResult, parse_qs, unquote, urlparse from urllib.request import urlopen -import isodate import pytest from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet from nturl2path import url2pathname as nt_url2pathname @@ -76,10 +74,39 @@ def get_unique_plugin_names(type: Type[PluginT]) -> Set[str]: return result -IdentifierTriple = Tuple[Identifier, Identifier, Identifier] -IdentifierTripleSet = Set[IdentifierTriple] -IdentifierQuad = Tuple[Identifier, Identifier, Identifier, Identifier] -IdentifierQuadSet = Set[IdentifierQuad] +if TYPE_CHECKING: + import typing_extensions as te + + +def get_random_ip(parts: List[str] = None) -> str: + if parts is None: + parts = ["127"] + for _ in range(4 - len(parts)): + parts.append(f"{random.randint(0, 255)}") + return ".".join(parts) + + +@contextmanager +def ctx_http_server( + handler: Type[BaseHTTPRequestHandler], host: str = "127.0.0.1" +) -> Iterator[HTTPServer]: + server = HTTPServer((host, 0), handler) + server_thread = Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + yield server + server.shutdown() + server.socket.close() + server_thread.join() + + +GHNode = Union[Identifier, FrozenSet[Tuple[Identifier, Identifier, Identifier]]] +GHTriple = Tuple[GHNode, GHNode, GHNode] +GHTripleSet = Set[GHTriple] +GHTripleFrozenSet = FrozenSet[GHTriple] +GHQuad = Tuple[GHNode, GHNode, GHNode, Identifier] +GHQuadSet = Set[GHQuad] +GHQuadFrozenSet = FrozenSet[GHQuad] class GraphHelper: @@ -88,47 +115,50 @@ class GraphHelper: """ @classmethod - def identifier(self, node: Node) -> Identifier: + def node(self, node: Node, exclude_blanks: bool = False) -> GHNode: """ Return the identifier of the provided node. """ if isinstance(node, Graph): - return node.identifier - else: - return cast(Identifier, node) + xset = cast(GHNode, self.triple_or_quad_set(node, exclude_blanks)) + return xset + + return cast(Identifier, node) @classmethod - def identifiers(cls, nodes: Tuple[Node, ...]) -> Tuple[Identifier, ...]: + def nodes( + cls, nodes: Tuple[Node, ...], exclude_blanks: bool = False + ) -> Tuple[GHNode, ...]: """ Return the identifiers of the provided nodes. """ result = [] for node in nodes: - result.append(cls.identifier(node)) + result.append(cls.node(node, exclude_blanks)) return tuple(result) @classmethod def triple_set( cls, graph: Graph, exclude_blanks: bool = False - ) -> IdentifierTripleSet: - result = set() + ) -> GHTripleFrozenSet: + result: GHTripleSet = set() for sn, pn, on in graph.triples((None, None, None)): - s, p, o = cls.identifiers((sn, pn, on)) + s, p, o = cls.nodes((sn, pn, on), exclude_blanks) if exclude_blanks and ( isinstance(s, BNode) or isinstance(p, BNode) or isinstance(o, BNode) ): continue result.add((s, p, o)) - return result + return frozenset(result) @classmethod def triple_sets( cls, graphs: Iterable[Graph], exclude_blanks: bool = False - ) -> List[IdentifierTripleSet]: + ) -> List[GHTripleFrozenSet]: """ Extracts the set of all triples from the supplied Graph. """ - result: List[IdentifierTripleSet] = [] + result: List[GHTripleFrozenSet] = [] for graph in graphs: result.append(cls.triple_set(graph, exclude_blanks)) return result @@ -136,27 +166,26 @@ def triple_sets( @classmethod def quad_set( cls, graph: ConjunctiveGraph, exclude_blanks: bool = False - ) -> IdentifierQuadSet: + ) -> GHQuadFrozenSet: """ Extracts the set of all quads from the supplied ConjunctiveGraph. """ - result = set() + result: GHQuadSet = set() for sn, pn, on, gn in graph.quads((None, None, None, None)): - s, p, o, g = cls.identifiers((sn, pn, on, gn)) + assert isinstance(gn, Graph) + s, p, o = cls.nodes((sn, pn, on), exclude_blanks) if exclude_blanks and ( - isinstance(s, BNode) - or isinstance(p, BNode) - or isinstance(o, BNode) - or isinstance(g, BNode) + isinstance(s, BNode) or isinstance(p, BNode) or isinstance(o, BNode) ): continue - result.add((s, p, o, g)) - return result + quad: GHQuad = (s, p, o, gn.identifier) + result.add(quad) + return frozenset(result) @classmethod def triple_or_quad_set( cls, graph: Graph, exclude_blanks: bool = False - ) -> Union[IdentifierQuadSet, IdentifierTripleSet]: + ) -> Union[GHQuadFrozenSet, GHTripleFrozenSet]: """ Extracts quad or triple sets depending on whether or not the graph is ConjunctiveGraph or a normal Graph. @@ -172,50 +201,70 @@ def assert_triple_sets_equals( """ Asserts that the triple sets in the two graphs are equal. """ - lhs_set = cls.triple_set(lhs, exclude_blanks) - rhs_set = cls.triple_set(rhs, exclude_blanks) + lhs_set = cls.triple_set(lhs, exclude_blanks) if isinstance(lhs, Graph) else lhs + rhs_set = cls.triple_set(rhs, exclude_blanks) if isinstance(rhs, Graph) else rhs assert lhs_set == rhs_set @classmethod def assert_quad_sets_equals( - cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph, exclude_blanks: bool = False + cls, + lhs: Union[ConjunctiveGraph, GHQuadSet], + rhs: Union[ConjunctiveGraph, GHQuadSet], + exclude_blanks: bool = False, ) -> None: """ Asserts that the quads sets in the two graphs are equal. """ - lhs_set = cls.quad_set(lhs, exclude_blanks) - rhs_set = cls.quad_set(rhs, exclude_blanks) + lhs_set = cls.quad_set(lhs, exclude_blanks) if isinstance(lhs, Graph) else lhs + rhs_set = cls.quad_set(rhs, exclude_blanks) if isinstance(rhs, Graph) else rhs assert lhs_set == rhs_set @classmethod def assert_sets_equals( - cls, lhs: Graph, rhs: Graph, exclude_blanks: bool = False + cls, + lhs: Union[Graph, GHTripleSet, GHQuadSet], + rhs: Union[Graph, GHTripleSet, GHQuadSet], + exclude_blanks: bool = False, ) -> None: """ Asserts that that ther quad or triple sets from the two graphs are equal. """ - lhs_set = cls.triple_or_quad_set(lhs, exclude_blanks) - rhs_set = cls.triple_or_quad_set(rhs, exclude_blanks) + lhs_set = ( + cls.triple_or_quad_set(lhs, exclude_blanks) + if isinstance(lhs, Graph) + else lhs + ) + rhs_set = ( + cls.triple_or_quad_set(rhs, exclude_blanks) + if isinstance(rhs, Graph) + else rhs + ) assert lhs_set == rhs_set @classmethod def format_set( cls, - item_set: Union[IdentifierQuadSet, IdentifierTripleSet], - prefix: str = " ", + item_set: Union[GHQuadSet, GHQuadFrozenSet, GHTripleSet, GHTripleFrozenSet], + indent: int = 1, sort: bool = False, ) -> str: - items = [] - use_item_set = sorted(item_set) if sort else item_set - for item in use_item_set: - items.append(f"{prefix}{item}") - return "\n".join(items) + def _key(node: Union[GHTriple, GHQuad, GHNode]): + val: Any = node + if isinstance(node, tuple): + val = tuple(_key(item) for item in node) + if isinstance(node, frozenset): + for triple in node: + nodes = cls.nodes(triple) + val = tuple(_key(item) for item in nodes) + key = (f"{type(node)}", val) + return key + + use_item_set = sorted(item_set, key=_key) if sort else item_set + return pprint.pformat(use_item_set, indent) @classmethod - def format_graph_set( - cls, graph: Graph, prefix: str = " ", sort: bool = False - ) -> str: - return cls.format_set(cls.triple_or_quad_set(graph), prefix, sort) + def format_graph_set(cls, graph: Graph, indent: int = 1, sort: bool = False) -> str: + return cls.format_set(cls.triple_or_quad_set(graph), indent, sort) @classmethod def assert_isomorphic( diff --git a/test/utils/test/test_testutils.py b/test/utils/test/test_testutils.py index b84ccea89..2485a404c 100644 --- a/test/utils/test/test_testutils.py +++ b/test/utils/test/test_testutils.py @@ -216,6 +216,42 @@ def rhs_format(self) -> str: } """, ), + SetsEqualTestCase( + equal=True, + format="n3", + ignore_blanks=False, + lhs=""" + { } {}. + """, + rhs=""" + @prefix eg: . + { eg:ss0 eg:sp0 eg:so0 } eg:p0 {}. + """, + ), + SetsEqualTestCase( + equal=True, + format="n3", + ignore_blanks=False, + lhs=""" + { } {}. + """, + rhs=""" + @prefix eg: . + { eg:ss0 eg:sp0 eg:so0 } eg:p0 {}. + """, + ), + SetsEqualTestCase( + equal=True, + format="n3", + ignore_blanks=False, + lhs=""" + { { } } {}. + """, + rhs=""" + @prefix eg: . + { { eg:sss0 eg:ssp0 eg:sso0 } eg:sp0 eg:so0 } eg:p0 {}. + """, + ), ], ) def test_assert_sets_equal(test_case: SetsEqualTestCase):