Make Result.serialize work more like Graph.serialize

This patch makes the following changes to `Result.serialize`. * Return str by default instead of bytes. * Use "txt" as the default tabular serialization format. * Use "turtle" as the default graph serialization format. * Support both typing.IO[bytes] and typing.TextIO destinations. Corresponding changes are made to the specific serializers also. This patch also changes how text is written to typing.IO[bytes] in serializers to ensure that the buffer is flushed and detatched from the TextIOWrapper once the serialization function completes so it can be used normally afterwards. This patch further includes a bunch of additional type hints.
RDFLib · Jan 2, 2022 · 8729ed9 · 8729ed9
1 parent 43d8622
commit 8729ed9
Show file tree

Hide file tree

Showing 23 changed files with 1,323 additions and 216 deletions.
diff --git a/rdflib/graph.py b/rdflib/graph.py
@@ -348,29 +348,25 @@ def __init__(
         self.formula_aware = False
         self.default_union = False
 
-    def __get_store(self):
+    @property
+    def store(self) -> Store:  # read-only attr
         return self.__store
 
-    store = property(__get_store)  # read-only attr
-
-    def __get_identifier(self):
+    @property
+    def identifier(self) -> Node:  # read-only attr
         return self.__identifier
 
-    identifier = property(__get_identifier)  # read-only attr
-
-    def _get_namespace_manager(self):
+    @property
+    def namespace_manager(self) -> NamespaceManager:
+        """this graph's namespace-manager"""
         if self.__namespace_manager is None:
             self.__namespace_manager = NamespaceManager(self)
         return self.__namespace_manager
 
-    def _set_namespace_manager(self, nm):
-        self.__namespace_manager = nm
-
-    namespace_manager = property(
-        _get_namespace_manager,
-        _set_namespace_manager,
-        doc="this graph's namespace-manager",
-    )
+    @namespace_manager.setter
+    def namespace_manager(self, value: NamespaceManager):
+        """this graph's namespace-manager"""
+        self.__namespace_manager = value
 
     def __repr__(self):
         return "<Graph identifier=%s (%s)>" % (self.identifier, type(self))
@@ -993,7 +989,12 @@ def absolutize(self, uri, defrag=1):
     # no destination and non-None positional encoding
     @overload
     def serialize(
-        self, destination: None, format: str, base: Optional[str], encoding: str, **args
+        self,
+        destination: None,
+        format: str,
+        base: Optional[str],
+        encoding: str,
+        **args,
     ) -> bytes:
         ...
 
@@ -1054,18 +1055,37 @@ def serialize(
         encoding: Optional[str] = None,
         **args: Any,
     ) -> Union[bytes, str, "Graph"]:
-        """Serialize the Graph to destination
-
-        If destination is None serialize method returns the serialization as
-        bytes or string.
-
-        If encoding is None and destination is None, returns a string
-        If encoding is set, and Destination is None, returns bytes
-
-        Format defaults to turtle.
-
-        Format support can be extended with plugins,
-        but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
+        """
+        Serialize the graph.
+
+        :param destination:
+           The destination to serialize the graph to. This can be a path as a
+           :class:`str` or :class:`~pathlib.PurePath` object, or it can be a
+           :class:`~typing.IO[bytes]` like object. If this parameter is not
+           supplied the serialized graph will be returned.
+        :type destination: Optional[Union[str, typing.IO[bytes], pathlib.PurePath]]
+        :param format:
+           The format that the output should be written in. This value
+           references a :class:`~rdflib.serializer.Serializer` plugin. Format
+           support can be extended with plugins, but `"xml"`, `"n3"`,
+           `"turtle"`, `"nt"`, `"pretty-xml"`, `"trix"`, `"trig"`, `"nquads"`
+           and `"json-ld"` are built in. Defaults to `"turtle"`.
+        :type format: str
+        :param base:
+           The base IRI for formats that support it. For the turtle format this
+           will be used as the `@base` directive.
+        :type base: Optional[str]
+        :param encoding: Encoding of output.
+        :type encoding: Optional[str]
+        :param **args:
+           Additional arguments to pass to the
+           :class:`~rdflib.serializer.Serializer` that will be used.
+        :type **args: Any
+        :return: The serialized graph if `destination` is `None`.
+        :rtype: :class:`bytes` if `destination` is `None` and `encoding` is not `None`.
+        :rtype: :class:`bytes` if `destination` is `None` and `encoding` is `None`.
+        :return: `self` (i.e. the :class:`~rdflib.graph.Graph` instance) if `destination` is not None.
+        :rtype: :class:`~rdflib.graph.Graph` if `destination` is not None.
         """
 
         # if base is not given as attribute use the base set for the graph
@@ -1254,7 +1274,7 @@ def query(
         if none are given, the namespaces from the graph's namespace manager
         are used.
 
-        :returntype: rdflib.query.Result
+        :returntype: :class:`~rdflib.query.Result`
 
         """
 

diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py
@@ -1,6 +1,6 @@
 import logging
 import warnings
-from typing import TYPE_CHECKING, List, Union, Iterable
+from typing import TYPE_CHECKING, List, Tuple, Union, Iterable
 from unicodedata import category
 
 from pathlib import Path
@@ -587,7 +587,7 @@ def bind(self, prefix, namespace, override=True, replace=False) -> None:
                     self.store.bind(prefix, namespace)
         insert_trie(self.__trie, str(namespace))
 
-    def namespaces(self):
+    def namespaces(self) -> Iterable[Tuple[str, URIRef]]:
         for prefix, namespace in self.store.namespaces():
             namespace = URIRef(namespace)
             yield prefix, namespace

diff --git a/rdflib/parser.py b/rdflib/parser.py
@@ -45,7 +45,7 @@ class Parser(object):
     def __init__(self):
         pass
 
-    def parse(self, source, sink):
+    def parse(self, source, sink, **args):
         pass
 
 

diff --git a/rdflib/plugins/serializers/n3.py b/rdflib/plugins/serializers/n3.py
@@ -109,7 +109,7 @@ def p_clause(self, node, position):
             self.write("{")
             self.depth += 1
             serializer = N3Serializer(node, parent=self)
-            serializer.serialize(self.stream)
+            serializer.serialize(self.stream.buffer)
             self.depth -= 1
             self.write(self.indent() + "}")
             return True

diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py
@@ -12,6 +12,8 @@
 import warnings
 import codecs
 
+from rdflib.util import as_textio
+
 __all__ = ["NTSerializer"]
 
 
@@ -38,9 +40,15 @@ def serialize(
                 f"Given encoding was: {encoding}"
             )
 
-        for triple in self.store:
-            stream.write(_nt_row(triple).encode())
-        stream.write("\n".encode())
+        with as_textio(
+            stream,
+            encoding=encoding, # TODO: CHECK: self.encoding set removed, why?
+            errors="_rdflib_nt_escape",
+            write_through=True,
+        ) as text_stream:
+            for triple in self.store:
+                text_stream.write(_nt_row(triple))
+            text_stream.write("\n")
 
 
 class NT11Serializer(NTSerializer):

diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py
@@ -1,4 +1,4 @@
-from typing import IO, Dict, Optional, Set
+from typing import IO, Dict, Optional, Set, cast
 from rdflib.plugins.serializers.xmlwriter import XMLWriter
 
 from rdflib.namespace import Namespace, RDF, RDFS  # , split_uri
@@ -173,6 +173,8 @@ def serialize(
         encoding: Optional[str] = None,
         **args,
     ):
+        # TODO FIXME: this should be Optional, but it's not because nothing
+        # treats it as such.
         self.__serialized: Dict[Identifier, int] = {}
         store = self.store
         # if base is given here, use that, if not and a base is set for the graph use that
@@ -239,6 +241,7 @@ def subject(self, subject: Identifier, depth: int = 1):
         writer = self.writer
 
         if subject in self.forceRDFAbout:
+            subject = cast(URIRef, subject)
             writer.push(RDFVOC.Description)
             writer.attribute(RDFVOC.about, self.relativize(subject))
             writer.pop(RDFVOC.Description)
@@ -280,6 +283,7 @@ def subj_as_obj_more_than(ceil):
 
         elif subject in self.forceRDFAbout:
             # TODO FIXME?: this looks like a duplicate of first condition
+            subject = cast(URIRef, subject)
             writer.push(RDFVOC.Description)
             writer.attribute(RDFVOC.about, self.relativize(subject))
             writer.pop(RDFVOC.Description)

diff --git a/rdflib/plugins/serializers/trig.py b/rdflib/plugins/serializers/trig.py
@@ -62,52 +62,45 @@ def serialize(
         spacious: Optional[bool] = None,
         **args,
     ):
-        self.reset()
-        self.stream = stream
-        # if base is given here, use that, if not and a base is set for the graph use that
-        if base is not None:
-            self.base = base
-        elif self.store.base is not None:
-            self.base = self.store.base
-
-        if spacious is not None:
-            self._spacious = spacious
-
-        self.preprocess()
-
-        self.startDocument()
-
-        firstTime = True
-        for store, (ordered_subjects, subjects, ref) in self._contexts.items():
-            if not ordered_subjects:
-                continue
-
-            self._references = ref
-            self._serialized = {}
-            self.store = store
-            self._subjects = subjects
-
-            if self.default_context and store.identifier == self.default_context:
-                self.write(self.indent() + "\n{")
-            else:
-                if isinstance(store.identifier, BNode):
-                    iri = store.identifier.n3()
-                else:
-                    iri = self.getQName(store.identifier)
-                    if iri is None:
-                        iri = store.identifier.n3()
-                self.write(self.indent() + "\n%s {" % iri)
+        self._serialize_init(stream, base, encoding, spacious)
+        try:
+            self.preprocess()
 
-            self.depth += 1
-            for subject in ordered_subjects:
-                if self.isDone(subject):
+            self.startDocument()
+
+            firstTime = True
+            for store, (ordered_subjects, subjects, ref) in self._contexts.items():
+                if not ordered_subjects:
                     continue
-                if firstTime:
-                    firstTime = False
-                if self.statement(subject) and not firstTime:
-                    self.write("\n")
-            self.depth -= 1
-            self.write("}\n")
-
-        self.endDocument()
-        stream.write("\n".encode("latin-1"))
+
+                self._references = ref
+                self._serialized = {}
+                self.store = store
+                self._subjects = subjects
+
+                if self.default_context and store.identifier == self.default_context:
+                    self.write(self.indent() + "\n{")
+                else:
+                    if isinstance(store.identifier, BNode):
+                        iri = store.identifier.n3()
+                    else:
+                        iri = self.getQName(store.identifier)
+                        if iri is None:
+                            iri = store.identifier.n3()
+                    self.write(self.indent() + "\n%s {" % iri)
+
+                self.depth += 1
+                for subject in ordered_subjects:
+                    if self.isDone(subject):
+                        continue
+                    if firstTime:
+                        firstTime = False
+                    if self.statement(subject) and not firstTime:
+                        self.write("\n")
+                self.depth -= 1
+                self.write("}\n")
+
+            self.endDocument()
+            self.write("\n")
+        finally:
+            self._serialize_end()
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,7 +45,7 @@ class Parser(object): @@
         def __init__(self):
             pass
-        def parse(self, source, sink):
+        def parse(self, source, sink, **args):
             pass
@@ Expand Down @@