forked from RDFLib/rdflib
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: two issues with the N3 serializer
This patch fixes two issues with the N3 serializer: - The N3 serializer incorrectly considered a subject as already serialized if it has been serialized inside a quoted graph. - The N3 serializer does not consider that the predicate of a triple can also be a graph. Other changes included in this patch: - Added the N3 test suite from https://github.com/w3c/N3/tree/master/tests - Added `test/data/fetcher.py` which fetches remote test data. - Changed `test.testutils.GraphHelper` to support nested graphs. Fixes: - RDFLib#1807 - RDFLib#1701 Related: - RDFLib#1840
- Loading branch information
Showing
1,837 changed files
with
624,378 additions
and
166 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,29 @@ | ||
# Consistent Test Data | ||
# Test Data | ||
|
||
This directory contains consistent graphs that can be used inside tests, the | ||
graphs in this directory should not change. | ||
This directory contains data for use inside tests, ideally the data in this | ||
directory should be constant and should not change, and in general non-original | ||
data that is widely known is preferred to original data as well known data has | ||
well known attributes and qualities that can make it easier to reason about. | ||
|
||
|
||
## File origins | ||
|
||
- `rdfs.ttl`: `http://www.w3.org/2000/01/rdf-schema#` | ||
|
||
## Fetcher | ||
|
||
Files that originate from the internet should be downloaded using `fetcher.py` | ||
so we can easily verify the integrity of the files by re-running `fetcher.py`. | ||
|
||
```bash | ||
# run in repo root | ||
|
||
# fetch everything | ||
.venv/bin/python3 test/data/fetcher.py | ||
|
||
# only fetch single file | ||
.venv/bin/python3 test/data/fetcher.py test/data/rdfs.ttl | ||
|
||
# only fetch files below path: | ||
.venv/bin/python3 test/data/fetcher.py test/data/suites | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,308 @@ | ||
import argparse | ||
import enum | ||
import logging | ||
import os | ||
import random | ||
import re | ||
import shutil | ||
import string | ||
import sys | ||
import tarfile | ||
from contextlib import ExitStack, contextmanager | ||
from dataclasses import dataclass, field | ||
from pathlib import Path | ||
from tarfile import TarFile, TarInfo | ||
from tempfile import TemporaryDirectory, mkdtemp | ||
from typing import IO, Generator, List, Pattern, Union | ||
from urllib.request import Request, urlopen | ||
from zipfile import ZipFile, ZipInfo | ||
|
||
DATA_PATH = Path(__file__).parent | ||
|
||
|
||
@dataclass | ||
class Resource: | ||
remote: Union[str, Request] | ||
local_path: Path | ||
|
||
def fetch(self, tmp_path: Path) -> None: | ||
raise NotImplementedError() | ||
|
||
|
||
@dataclass | ||
class FileResource(Resource): | ||
def fetch(self, tmp_path: Path) -> None: | ||
if self.local_path.exists(): | ||
logging.debug("info %s", self.local_path) | ||
os.remove(self.local_path) | ||
|
||
with ExitStack() as xstack: | ||
request = ( | ||
self.remote | ||
if isinstance(self.remote, Request) | ||
else Request(self.remote) | ||
) | ||
response = urlopen(request) | ||
remote_io: IO[bytes] = xstack.enter_context(response) | ||
|
||
local_io = xstack.enter_context(self.local_path.open("wb+")) | ||
shutil.copyfileobj(remote_io, local_io) | ||
|
||
logging.info("Downloaded %s to %s", request.full_url, self.local_path) | ||
|
||
|
||
class ArchiveType(enum.Enum): | ||
ZIP = "zip" | ||
TAR_GZ = "tar.gz" | ||
|
||
|
||
@dataclass | ||
class ArchiveResource(Resource): | ||
type: ArchiveType | ||
pattern: Pattern[str] | ||
|
||
def fetch(self, tmp_path: Path) -> None: | ||
if self.local_path.exists(): | ||
logging.debug("info %s", self.local_path) | ||
shutil.rmtree(self.local_path) | ||
with ExitStack() as xstack: | ||
request = ( | ||
self.remote | ||
if isinstance(self.remote, Request) | ||
else Request(self.remote) | ||
) | ||
response = urlopen(request) | ||
remote_io: IO[bytes] = xstack.enter_context(response) | ||
name = ( | ||
"".join( | ||
random.choices( | ||
string.ascii_uppercase + string.digits + string.ascii_lowercase, | ||
k=10, | ||
) | ||
) | ||
+ f".{self.type.value}" | ||
) | ||
tmp_file = tmp_path / name | ||
logging.info("fetching %s to temp file %s", self.remote, tmp_file) | ||
with tmp_file.open("wb+") as tmp_io: | ||
shutil.copyfileobj(remote_io, tmp_io) | ||
|
||
archive_file: Union[ZipFile, TarFile] | ||
if self.type is ArchiveType.ZIP: | ||
archive_file = xstack.enter_context(ZipFile(tmp_file)) | ||
elif self.type is ArchiveType.TAR_GZ: | ||
archive_file = xstack.enter_context(tarfile.open(tmp_file, mode="r:gz")) | ||
# archive_file = xstack.enter_context(TarFile(tmp_file, mode="r|gz")) | ||
else: | ||
raise ValueError(f"invalid type {self.type}") | ||
|
||
for member_info in self._member_list(archive_file): | ||
member_filename = self._member_filename(member_info) | ||
if self._member_isdir(member_info): | ||
logging.debug("Ignoring directory %s", member_filename) | ||
continue | ||
|
||
match = self.pattern.match(member_filename) | ||
if match is None: | ||
logging.debug("Ignoring unmatched %s", member_filename) | ||
continue | ||
groups = match.groups() | ||
if len(groups) > 0: | ||
dest_filename = groups[0] | ||
|
||
member_io: IO[bytes] | ||
with self._member_io(archive_file, member_info) as member_io: | ||
local_file = self.local_path / dest_filename | ||
if not local_file.parent.exists(): | ||
local_file.parent.mkdir(parents=True) | ||
logging.debug("writing %s to %s", member_filename, local_file) | ||
local_file.write_bytes(member_io.read()) | ||
|
||
logging.info( | ||
"Downloaded %s and extracted files matching %s to %s", | ||
request.full_url, | ||
self.pattern, | ||
self.local_path, | ||
) | ||
|
||
@classmethod | ||
def _member_list( | ||
cls, archive: Union[ZipFile, TarFile] | ||
) -> Union[List[ZipInfo], List[TarInfo]]: | ||
if isinstance(archive, ZipFile): | ||
return archive.infolist() | ||
return archive.getmembers() | ||
|
||
@classmethod | ||
def _member_isdir(cls, member_info: Union[ZipInfo, TarInfo]) -> bool: | ||
if isinstance(member_info, ZipInfo): | ||
return member_info.is_dir() | ||
return member_info.isdir() | ||
|
||
@classmethod | ||
def _member_filename(cls, member_info: Union[ZipInfo, TarInfo]) -> str: | ||
if isinstance(member_info, ZipInfo): | ||
return member_info.filename | ||
return member_info.name | ||
|
||
@classmethod | ||
@contextmanager | ||
def _member_io( | ||
cls, archive: Union[ZipFile, TarFile], member_info: Union[ZipInfo, TarInfo] | ||
) -> Generator[IO[bytes], None, None]: | ||
if isinstance(archive, ZipFile): | ||
assert isinstance(member_info, ZipInfo) | ||
with archive.open(member_info) as member_io: | ||
yield member_io | ||
else: | ||
assert isinstance(member_info, TarInfo) | ||
opt_io = archive.extractfile(member_info) | ||
assert opt_io is not None | ||
yield opt_io | ||
|
||
|
||
RESOURCES: List[Resource] = [ | ||
ArchiveResource( | ||
remote="https://github.com/w3c/N3/archive/c44d123c5958ca04117e28ca3769e2c0820f72e6.zip", | ||
local_path=(DATA_PATH / "suites" / "w3c" / "n3"), | ||
type=ArchiveType.ZIP, | ||
pattern=re.compile(r"^[^\/]+[\/]tests[\/](.+)$"), | ||
), | ||
ArchiveResource( | ||
remote="https://www.w3.org/2013/TurtleTests/TESTS.tar.gz", | ||
local_path=(DATA_PATH / "suites" / "w3c" / "turtle"), | ||
type=ArchiveType.TAR_GZ, | ||
pattern=re.compile(r"^[^\/]+[\/](.+)$"), | ||
), | ||
ArchiveResource( | ||
remote="https://www.w3.org/2013/N-QuadsTests/TESTS.tar.gz", | ||
local_path=(DATA_PATH / "suites" / "w3c" / "nquads"), | ||
type=ArchiveType.TAR_GZ, | ||
pattern=re.compile(r"^(.+)$"), | ||
), | ||
ArchiveResource( | ||
remote="https://www.w3.org/2013/N-TriplesTests/TESTS.tar.gz", | ||
local_path=(DATA_PATH / "suites" / "w3c" / "ntriples"), | ||
type=ArchiveType.TAR_GZ, | ||
pattern=re.compile(r"^(.+)$"), | ||
), | ||
ArchiveResource( | ||
remote="https://www.w3.org/2013/TrigTests/TESTS.tar.gz", | ||
local_path=(DATA_PATH / "suites" / "w3c" / "trig"), | ||
type=ArchiveType.TAR_GZ, | ||
pattern=re.compile(r"^(.+)$"), | ||
), | ||
# NOTE: Commented out as these files contains local modifications. | ||
# ArchiveResource( | ||
# remote="https://www.w3.org/2013/RDFXMLTests/TESTS.zip", | ||
# local_path=(DATA_PATH / "suites" / "w3c" / "rdfxml"), | ||
# type=ArchiveType.ZIP, | ||
# pattern=re.compile(r"^(.+)$"), | ||
# ), | ||
# NOTE: Commented out as this contains local modifications. | ||
# ArchiveResource( | ||
# remote="https://www.w3.org/2009/sparql/docs/tests/sparql11-test-suite-20121023.tar.gz", | ||
# local_path=(DATA_PATH / "suites" / "DAWG" / "data-sparql11"), | ||
# type=ArchiveType.TAR_GZ, | ||
# pattern=re.compile(r"^[^\/]+[\/](.+)$"), | ||
# ), | ||
FileResource( | ||
remote=Request( | ||
"http://www.w3.org/2000/01/rdf-schema#", headers={"Accept": "text/turtle"} | ||
), | ||
local_path=(DATA_PATH / "rdfs.ttl"), | ||
), | ||
] | ||
|
||
|
||
@dataclass | ||
class Application: | ||
parser: argparse.ArgumentParser = field( | ||
default_factory=lambda: argparse.ArgumentParser(add_help=True) | ||
) | ||
|
||
def __post_init__(self) -> None: | ||
parser = self.parser | ||
parser.add_argument( | ||
"-v", | ||
"--verbose", | ||
action="count", | ||
dest="verbosity", | ||
help="increase verbosity level", | ||
) | ||
parser.add_argument( | ||
"--keep-tmp", | ||
action="store_true", | ||
default=False, | ||
) | ||
parser.add_argument("paths", nargs="*", type=str) | ||
parser.set_defaults(handler=self.handle) | ||
|
||
def run(self, args: List[str]) -> None: | ||
parse_result = self.parser.parse_args(args) | ||
|
||
verbosity = parse_result.verbosity | ||
if verbosity is not None: | ||
root_logger = logging.getLogger("") | ||
root_logger.propagate = True | ||
new_level = ( | ||
root_logger.getEffectiveLevel() | ||
- (min(1, verbosity)) * 10 | ||
- min(max(0, verbosity - 1), 9) * 1 | ||
) | ||
root_logger.setLevel(new_level) | ||
|
||
logging.debug( | ||
"args = %s, parse_result = %s, logging.level = %s", | ||
args, | ||
parse_result, | ||
logging.getLogger("").getEffectiveLevel(), | ||
) | ||
|
||
parse_result.handler(parse_result) | ||
|
||
def handle(self, parse_result: argparse.Namespace) -> None: | ||
logging.debug("entry ...") | ||
|
||
paths = {Path(path).absolute() for path in parse_result.paths} | ||
|
||
logging.debug("paths = %s", paths) | ||
|
||
if parse_result.keep_tmp: | ||
tmp_path = Path(mkdtemp()) | ||
else: | ||
tmp_dir = TemporaryDirectory() | ||
tmp_path = Path(tmp_dir.name) | ||
|
||
for resource in RESOURCES: | ||
if paths: | ||
include = False | ||
for path in paths: | ||
try: | ||
resource.local_path.absolute().relative_to(path) | ||
include = True | ||
except ValueError: | ||
# not relative to, ignoring | ||
pass | ||
if not include: | ||
logging.info("skipping %s", resource.local_path) | ||
continue | ||
resource.fetch(tmp_path) | ||
|
||
|
||
def main() -> None: | ||
logging.basicConfig( | ||
level=os.environ.get("PYLOGGING_LEVEL", logging.INFO), | ||
stream=sys.stderr, | ||
datefmt="%Y-%m-%dT%H:%M:%S", | ||
format=( | ||
"%(asctime)s.%(msecs)03d %(process)d %(thread)d %(levelno)03d:%(levelname)-8s " | ||
"%(name)-12s %(module)s:%(lineno)s:%(funcName)s %(message)s" | ||
), | ||
) | ||
|
||
Application().run(sys.argv[1:]) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
All documents in this Repository are licensed by contributors | ||
under both the the [W3C Test Suite License](http://www.w3.org/Consortium/Legal/2008/04-testsuite-license) and | ||
[W3C Software and Document License](https://www.w3.org/Consortium/Legal/copyright-software). | ||
|
Oops, something went wrong.