Skip to content

Commit

Permalink
Make Result.serialize work more like Graph.serialize
Browse files Browse the repository at this point in the history
This patch makes the following changes to `Result.serialize`.

* Return str by default instead of bytes.
* Use "txt" as the default tabular serialization format.
* Use "turtle" as the default graph serialization format.
* Support both typing.IO[bytes] and typing.TextIO destinations.

Corresponding changes are made to the specific serializers also.

This patch also changes how text is written to typing.IO[bytes] in
serializers to ensure that the buffer is flushed and
detatched from the TextIOWrapper once the serialization function
completes so it can be used normally afterwards.

This patch further includes a bunch of additional type hints.
  • Loading branch information
aucampia committed Jan 2, 2022
1 parent 43d8622 commit 8729ed9
Show file tree
Hide file tree
Showing 23 changed files with 1,323 additions and 216 deletions.
78 changes: 49 additions & 29 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,29 +348,25 @@ def __init__(
self.formula_aware = False
self.default_union = False

def __get_store(self):
@property
def store(self) -> Store: # read-only attr
return self.__store

store = property(__get_store) # read-only attr

def __get_identifier(self):
@property
def identifier(self) -> Node: # read-only attr
return self.__identifier

identifier = property(__get_identifier) # read-only attr

def _get_namespace_manager(self):
@property
def namespace_manager(self) -> NamespaceManager:
"""this graph's namespace-manager"""
if self.__namespace_manager is None:
self.__namespace_manager = NamespaceManager(self)
return self.__namespace_manager

def _set_namespace_manager(self, nm):
self.__namespace_manager = nm

namespace_manager = property(
_get_namespace_manager,
_set_namespace_manager,
doc="this graph's namespace-manager",
)
@namespace_manager.setter
def namespace_manager(self, value: NamespaceManager):
"""this graph's namespace-manager"""
self.__namespace_manager = value

def __repr__(self):
return "<Graph identifier=%s (%s)>" % (self.identifier, type(self))
Expand Down Expand Up @@ -993,7 +989,12 @@ def absolutize(self, uri, defrag=1):
# no destination and non-None positional encoding
@overload
def serialize(
self, destination: None, format: str, base: Optional[str], encoding: str, **args
self,
destination: None,
format: str,
base: Optional[str],
encoding: str,
**args,
) -> bytes:
...

Expand Down Expand Up @@ -1054,18 +1055,37 @@ def serialize(
encoding: Optional[str] = None,
**args: Any,
) -> Union[bytes, str, "Graph"]:
"""Serialize the Graph to destination
If destination is None serialize method returns the serialization as
bytes or string.
If encoding is None and destination is None, returns a string
If encoding is set, and Destination is None, returns bytes
Format defaults to turtle.
Format support can be extended with plugins,
but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
"""
Serialize the graph.
:param destination:
The destination to serialize the graph to. This can be a path as a
:class:`str` or :class:`~pathlib.PurePath` object, or it can be a
:class:`~typing.IO[bytes]` like object. If this parameter is not
supplied the serialized graph will be returned.
:type destination: Optional[Union[str, typing.IO[bytes], pathlib.PurePath]]
:param format:
The format that the output should be written in. This value
references a :class:`~rdflib.serializer.Serializer` plugin. Format
support can be extended with plugins, but `"xml"`, `"n3"`,
`"turtle"`, `"nt"`, `"pretty-xml"`, `"trix"`, `"trig"`, `"nquads"`
and `"json-ld"` are built in. Defaults to `"turtle"`.
:type format: str
:param base:
The base IRI for formats that support it. For the turtle format this
will be used as the `@base` directive.
:type base: Optional[str]
:param encoding: Encoding of output.
:type encoding: Optional[str]
:param **args:
Additional arguments to pass to the
:class:`~rdflib.serializer.Serializer` that will be used.
:type **args: Any
:return: The serialized graph if `destination` is `None`.
:rtype: :class:`bytes` if `destination` is `None` and `encoding` is not `None`.
:rtype: :class:`bytes` if `destination` is `None` and `encoding` is `None`.
:return: `self` (i.e. the :class:`~rdflib.graph.Graph` instance) if `destination` is not None.
:rtype: :class:`~rdflib.graph.Graph` if `destination` is not None.
"""

# if base is not given as attribute use the base set for the graph
Expand Down Expand Up @@ -1254,7 +1274,7 @@ def query(
if none are given, the namespaces from the graph's namespace manager
are used.
:returntype: rdflib.query.Result
:returntype: :class:`~rdflib.query.Result`
"""

Expand Down
4 changes: 2 additions & 2 deletions rdflib/namespace/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import warnings
from typing import TYPE_CHECKING, List, Union, Iterable
from typing import TYPE_CHECKING, List, Tuple, Union, Iterable
from unicodedata import category

from pathlib import Path
Expand Down Expand Up @@ -587,7 +587,7 @@ def bind(self, prefix, namespace, override=True, replace=False) -> None:
self.store.bind(prefix, namespace)
insert_trie(self.__trie, str(namespace))

def namespaces(self):
def namespaces(self) -> Iterable[Tuple[str, URIRef]]:
for prefix, namespace in self.store.namespaces():
namespace = URIRef(namespace)
yield prefix, namespace
Expand Down
2 changes: 1 addition & 1 deletion rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Parser(object):
def __init__(self):
pass

def parse(self, source, sink):
def parse(self, source, sink, **args):
pass


Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/serializers/n3.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def p_clause(self, node, position):
self.write("{")
self.depth += 1
serializer = N3Serializer(node, parent=self)
serializer.serialize(self.stream)
serializer.serialize(self.stream.buffer)
self.depth -= 1
self.write(self.indent() + "}")
return True
Expand Down
14 changes: 11 additions & 3 deletions rdflib/plugins/serializers/nt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import warnings
import codecs

from rdflib.util import as_textio

__all__ = ["NTSerializer"]


Expand All @@ -38,9 +40,15 @@ def serialize(
f"Given encoding was: {encoding}"
)

for triple in self.store:
stream.write(_nt_row(triple).encode())
stream.write("\n".encode())
with as_textio(
stream,
encoding=encoding, # TODO: CHECK: self.encoding set removed, why?
errors="_rdflib_nt_escape",
write_through=True,
) as text_stream:
for triple in self.store:
text_stream.write(_nt_row(triple))
text_stream.write("\n")


class NT11Serializer(NTSerializer):
Expand Down
6 changes: 5 additions & 1 deletion rdflib/plugins/serializers/rdfxml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import IO, Dict, Optional, Set
from typing import IO, Dict, Optional, Set, cast
from rdflib.plugins.serializers.xmlwriter import XMLWriter

from rdflib.namespace import Namespace, RDF, RDFS # , split_uri
Expand Down Expand Up @@ -173,6 +173,8 @@ def serialize(
encoding: Optional[str] = None,
**args,
):
# TODO FIXME: this should be Optional, but it's not because nothing
# treats it as such.
self.__serialized: Dict[Identifier, int] = {}
store = self.store
# if base is given here, use that, if not and a base is set for the graph use that
Expand Down Expand Up @@ -239,6 +241,7 @@ def subject(self, subject: Identifier, depth: int = 1):
writer = self.writer

if subject in self.forceRDFAbout:
subject = cast(URIRef, subject)
writer.push(RDFVOC.Description)
writer.attribute(RDFVOC.about, self.relativize(subject))
writer.pop(RDFVOC.Description)
Expand Down Expand Up @@ -280,6 +283,7 @@ def subj_as_obj_more_than(ceil):

elif subject in self.forceRDFAbout:
# TODO FIXME?: this looks like a duplicate of first condition
subject = cast(URIRef, subject)
writer.push(RDFVOC.Description)
writer.attribute(RDFVOC.about, self.relativize(subject))
writer.pop(RDFVOC.Description)
Expand Down
87 changes: 40 additions & 47 deletions rdflib/plugins/serializers/trig.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,52 +62,45 @@ def serialize(
spacious: Optional[bool] = None,
**args,
):
self.reset()
self.stream = stream
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
elif self.store.base is not None:
self.base = self.store.base

if spacious is not None:
self._spacious = spacious

self.preprocess()

self.startDocument()

firstTime = True
for store, (ordered_subjects, subjects, ref) in self._contexts.items():
if not ordered_subjects:
continue

self._references = ref
self._serialized = {}
self.store = store
self._subjects = subjects

if self.default_context and store.identifier == self.default_context:
self.write(self.indent() + "\n{")
else:
if isinstance(store.identifier, BNode):
iri = store.identifier.n3()
else:
iri = self.getQName(store.identifier)
if iri is None:
iri = store.identifier.n3()
self.write(self.indent() + "\n%s {" % iri)
self._serialize_init(stream, base, encoding, spacious)
try:
self.preprocess()

self.depth += 1
for subject in ordered_subjects:
if self.isDone(subject):
self.startDocument()

firstTime = True
for store, (ordered_subjects, subjects, ref) in self._contexts.items():
if not ordered_subjects:
continue
if firstTime:
firstTime = False
if self.statement(subject) and not firstTime:
self.write("\n")
self.depth -= 1
self.write("}\n")

self.endDocument()
stream.write("\n".encode("latin-1"))

self._references = ref
self._serialized = {}
self.store = store
self._subjects = subjects

if self.default_context and store.identifier == self.default_context:
self.write(self.indent() + "\n{")
else:
if isinstance(store.identifier, BNode):
iri = store.identifier.n3()
else:
iri = self.getQName(store.identifier)
if iri is None:
iri = store.identifier.n3()
self.write(self.indent() + "\n%s {" % iri)

self.depth += 1
for subject in ordered_subjects:
if self.isDone(subject):
continue
if firstTime:
firstTime = False
if self.statement(subject) and not firstTime:
self.write("\n")
self.depth -= 1
self.write("}\n")

self.endDocument()
self.write("\n")
finally:
self._serialize_end()
Loading

0 comments on commit 8729ed9

Please sign in to comment.