Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add parsing of <_:identifier> style bnodes; switch to line by line pa…
Browse files Browse the repository at this point in the history
…rsing.
recalcitrantsupplant committed Aug 2, 2024

Verified

This commit was signed with the committer’s verified signature.
mbklein Michael B. Klein
1 parent 3a59a07 commit 0e75969
Showing 3 changed files with 70 additions and 22 deletions.
75 changes: 53 additions & 22 deletions rdflib/plugins/parsers/patch.py
Original file line number Diff line number Diff line change
@@ -2,16 +2,19 @@

from codecs import getreader
from enum import Enum
from typing import Any, MutableMapping, Optional
from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union

from rdflib.exceptions import ParserError as ParseError
from rdflib.graph import Dataset
from rdflib.parser import InputSource
from rdflib.plugins.parsers.nquads import NQuadsParser

# Build up from the NTriples parser:
from rdflib.plugins.parsers.ntriples import r_wspace
from rdflib.term import BNode
from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace
from rdflib.term import BNode, URIRef

if TYPE_CHECKING:
import typing_extensions as te

__all__ = ["RDFPatchParser", "Operation"]

@@ -88,14 +91,7 @@ def parse( # type: ignore[override]
self.parsepatch(bnode_context)
except ParseError as msg:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))

if self.skolemize:
return self.sink
else:
self.sink = self.sink.de_skolemize()
return self.sink # Dataset is skolemized as part of adding/removing triples
# , so de skolemize before returning. NB this is broken by the parent class
# (ConjunctiveGraph) which re-skolemizes the dataset.
return self.sink

def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
self.eat(r_wspace)
@@ -108,23 +104,46 @@ def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
operation = self.operation()
self.eat(r_wspace)

if operation == Operation.AddTripleOrQuad:
self.add_triple_or_quad()
elif operation == Operation.DeleteTripleOrQuad:
self.delete_triple_or_quad()
if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]:
self.add_or_remove_triple_or_quad(operation, bnode_context)
elif operation == Operation.AddPrefix:
self.add_prefix()
elif operation == Operation.DeletePrefix:
self.delete_prefix()

def add_triple_or_quad(self):
self.sink.parse(data=self.line, format="nquads", skolemize=True)
def add_or_remove_triple_or_quad(
self, operation, bnode_context: Optional[_BNodeContextType] = None
) -> None:
self.eat(r_wspace)
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment

def delete_triple_or_quad(self):
removal_ds = Dataset()
removal_ds.parse(data=self.line, format="nquads", skolemize=True)
triple_or_quad = next(iter(removal_ds))
self.sink.remove(triple_or_quad)
subject = self.labeled_bnode() or self.subject(bnode_context)
self.eat(r_wspace)

predicate = self.predicate()
self.eat(r_wspace)

obj = self.labeled_bnode() or self.object(bnode_context)
self.eat(r_wspace)

context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context)
self.eat(r_tail)

if self.line:
raise ParseError("Trailing garbage")
# Must have a context aware store - add on a normal Graph
# discards anything where the ctx != graph.identifier
if operation == Operation.AddTripleOrQuad:
if context:
self.sink.get_context(context).add((subject, predicate, obj))
else:
self.sink.default_context.add((subject, predicate, obj))
elif operation == Operation.DeleteTripleOrQuad:
if context:
self.sink.get_context(context).remove((subject, predicate, obj))
else:
self.sink.default_context.remove((subject, predicate, obj))

def add_prefix(self):
# Extract prefix and URI from the line
@@ -148,3 +167,15 @@ def operation(self) -> Operation:

def eat_op(self, op: str) -> None:
self.line = self.line.lstrip(op)

def nodeid(
self, bnode_context: Optional[_BNodeContextType] = None
) -> Union[te.Literal[False], BNode, URIRef]:
if self.peek("_"):
return BNode(self.eat(r_nodeid).group(1))

def labeled_bnode(self):
if self.peek("<_"):
plain_uri = self.eat(r_uriref).group(1)
bnode_id = r_nodeid.match(plain_uri).group(1)
return BNode(bnode_id)
6 changes: 6 additions & 0 deletions test/data/patch/add_and_delete_labeled_bnode_quads.rdp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TX .
A <_:bn1> <http://example.org/predicate1> "object1" <http://example.org/graph1> .
A <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
A <_:bn1> <http://example.org/predicate3> "object3" <http://example.org/graph1> .
D <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
TC .
11 changes: 11 additions & 0 deletions test/test_parsers/test_parser_patch.py
Original file line number Diff line number Diff line change
@@ -137,3 +137,14 @@ def test_10(self):
with open(delete_bnode_uri_path, "rb") as data:
ds.parse(data, format="patch")
assert len(ds) == 0

def test_11(self):
ds = Dataset()
nq_path = os.path.relpath(
os.path.join(TEST_DATA_DIR, "patch/add_and_delete_labeled_bnode_quads.rdp"),
os.curdir,
)
with open(nq_path, "rb") as data:
ds.parse(data, format="patch")
assert len(ds) == 2
return ds

0 comments on commit 0e75969

Please sign in to comment.