Skip to content

Commit

Permalink
Fix SPARQL update parsing to handle arbitrary amounts of triples in i…
Browse files Browse the repository at this point in the history
…nserts

pyparsing handling of recursive rules is limited by python's recursion limit
and the rule that should handle multiple triples in data inserts is
recursive.

The consequence of this is that the amount of triples that can be
inserted becomes limited by python's recursion limit.

To address this the rule has been rewritten to not be recursive.

@rchateauneu thanks for the help with rule rewriting.
  • Loading branch information
aucampia committed Jun 20, 2021
1 parent a32f48b commit 5df0a36
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 4 deletions.
13 changes: 9 additions & 4 deletions rdflib/plugins/sparql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,10 +584,15 @@ def _hexExpand(match):
TriplesSameSubject = VarOrTerm + PropertyListNotEmpty | TriplesNode + PropertyList
TriplesSameSubject.setParseAction(expandTriples)

# [52] TriplesTemplate ::= TriplesSameSubject ( '.' Optional(TriplesTemplate) )?
TriplesTemplate = Forward()
TriplesTemplate <<= ParamList("triples", TriplesSameSubject) + Optional(
Suppress(".") + Optional(TriplesTemplate)
# [52] TriplesTemplate ::= TriplesSameSubject ( '.' TriplesTemplate? )?
# NOTE: pyparsing.py handling of recursive rules is limited by python's recusion
# limit.
# (https://docs.python.org/3/library/sys.html#sys.setrecursionlimit)
# To accomodate aribtrary amounts of triples this rule is rewritten to not be
# recursive:
# [52*] TriplesTemplate ::= TriplesSameSubject ( '.' TriplesSameSubject? )*
TriplesTemplate = ParamList("triples", TriplesSameSubject) + ZeroOrMore(
Suppress(".") + Optional(ParamList("triples", TriplesSameSubject))
)

# [51] QuadsNotTriples ::= 'GRAPH' VarOrIri '{' Optional(TriplesTemplate) '}'
Expand Down
53 changes: 53 additions & 0 deletions test/test_sparql_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from rdflib import Graph, Literal
from rdflib.term import Node
from rdflib.namespace import Namespace
from rdflib.plugins.sparql.processor import processUpdate
import unittest
import sys
import math
from typing import Set, Tuple


def triple_set(graph: Graph) -> Set[Tuple[Node, Node, Node]]:
return set(graph.triples((None, None, None)))


class SPARQLParserTests(unittest.TestCase):
def test_insert_recursionlimit(self) -> None:
# These values are experimentally determined
# to cause the RecursionError reported in
# https://github.com/RDFLib/rdflib/issues/1336
resource_count = math.ceil(sys.getrecursionlimit() / (33 - 3))
self.do_insert(resource_count)

def test_insert_large(self) -> None:
self.do_insert(200)

def do_insert(self, resource_count: int) -> None:
EGV = Namespace("http://example.org/vocab#")
EGI = Namespace("http://example.org/instance#")
prop0, prop1, prop2 = EGV["prop0"], EGV["prop1"], EGV["prop2"]
g0 = Graph()
for index in range(resource_count):
resource = EGI[f"resource{index}"]
g0.add((resource, prop0, Literal(index)))
g0.add((resource, prop1, Literal("example resource")))
g0.add((resource, prop2, Literal(f"resource #{index}")))

g0ntriples = g0.serialize(format="ntriples")
g1 = Graph()

self.assertNotEqual(triple_set(g0), triple_set(g1))

try:
processUpdate(g1, f"INSERT DATA {{ {g0ntriples!s} }}")
except BaseException:
# logging.error("caught", exc_info=True)
raise

self.assertEqual(triple_set(g0), triple_set(g1))


if __name__ == "__main__":

unittest.main()

0 comments on commit 5df0a36

Please sign in to comment.