Skip to content

Commit

Permalink
fix explicit dataset: FROM and FROM NAMED clause
Browse files Browse the repository at this point in the history
When using a FROM or FROM NAMED clause: redefine entirely the query's RDF dataset.
Include only the graphs in FROM clause in the query's default graph
Include only the graphs in the FROM NAMED clause in the query's named graphs
Try to load external graphs only if they don't already exist in the given ConjunctiveGraph
  • Loading branch information
apicouSP committed May 28, 2024
1 parent 53f72d8 commit d6858e0
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 30 deletions.
28 changes: 2 additions & 26 deletions rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,36 +661,12 @@ def evalQuery(
:doc:`Security Considerations </security_considerations>`
documentation.
"""
main = query.algebra

initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())

ctx = QueryContext(graph, initBindings=initBindings)
ctx = QueryContext(graph, initBindings=initBindings, datasetClause=main.datasetClause)

ctx.prologue = query.prologue
main = query.algebra

if main.datasetClause:
if ctx.dataset is None:
raise Exception(
"Non-conjunctive-graph doesn't know about "
+ "graphs! Try a query without FROM (NAMED)."
)

ctx = ctx.clone() # or push/pop?

firstDefault = False
for d in main.datasetClause:
if d.default:
if firstDefault:
# replace current default graph
dg = ctx.dataset.get_context(BNode())
ctx = ctx.pushGraph(dg)
firstDefault = True

ctx.load(d.default, default=True)

elif d.named:
g = d.named
ctx.load(g, default=False)

return evalPart(ctx, main)
23 changes: 19 additions & 4 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def __init__(
graph: Optional[Graph] = None,
bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
initBindings: Optional[Mapping[str, Identifier]] = None,
datasetClause=None
):
self.initBindings = initBindings
self.bindings = Bindings(d=bindings or [])
Expand All @@ -264,11 +265,25 @@ def __init__(
self.graph: Optional[Graph]
self._dataset: Optional[ConjunctiveGraph]
if isinstance(graph, ConjunctiveGraph):
self._dataset = graph
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
self.graph = self.dataset
if datasetClause:
self._dataset = ConjunctiveGraph()
self.graph = Graph()
for d in datasetClause:
if d.default:
self.graph += graph.get_context(d.default)
if not graph.get_context(d.default):
self.load(d.default, default=True)
elif d.named:
namedGraphs = Graph(store=self.dataset.store, identifier=d.named)
namedGraphs += graph.get_context(d.named)
if not graph.get_context(d.named):
self.load(d.named, default=False)
else:
self.graph = self.dataset.default_context
self._dataset = graph
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
self.graph = self.dataset
else:
self.graph = self.dataset.default_context
else:
self._dataset = None
self.graph = graph
Expand Down
57 changes: 57 additions & 0 deletions test/test_sparql/test_dataset_exclusive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from rdflib.graph import ConjunctiveGraph
from rdflib.term import URIRef
import pytest
from typing import Generator
from test.utils.sparql_checker import ctx_configure_rdflib

graph = ConjunctiveGraph()
# Adding into default graph
graph.add((URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")))
# Adding into named graphs
graph.add((URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"), URIRef("urn:g1")))
graph.add((URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2"), URIRef("urn:g2")))
graph.add((URIRef("urn:s3"), URIRef("urn:p3"), URIRef("urn:o3"), URIRef("urn:g3")))


# Set SPARQL_DEFAULT_GRAPH_UNION to false to make dataset inclusive
# Set it back at the end of the test
@pytest.fixture(scope="module", autouse=True)
def configure_rdflib() -> Generator[None, None, None]:
with ctx_configure_rdflib():
yield None


# Test implicit exlusive dataset
def test_exclusive():
results = list(graph.query("SELECT ?s ?p ?o WHERE {?s ?p ?o}"))
assert results == [(URIRef('urn:s0'), URIRef('urn:p0'), URIRef('urn:o0'))]


# Test explicit default graph with exclusive dataset
def test_from():
results = list(graph.query("SELECT ?s ?p ?o FROM <urn:g1> WHERE {?s ?p ?o}"))
assert results == [(URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1'))]


# Test explicit named graphs with exclusive dataset
def test_from_named():
results = list(graph.query("SELECT ?g ?s ?p ?o FROM NAMED <urn:g1> WHERE {graph ?g {?s ?p ?o}}"))
assert results == [(URIRef('urn:g1'), URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1'))]


# Test that we can use from and from named in the same query
def test_from_and_from_named():
query = """
SELECT ?g ?s ?p ?o
FROM <urn:g1>
FROM NAMED <urn:g2>
WHERE {
{?s ?p ?o}
UNION {graph ?g {?s ?p ?o}}
} ORDER BY ?s
"""
results = list(graph.query(query))
assert results == [
(None, URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1')),
(URIRef('urn:g2'), URIRef('urn:s2'), URIRef('urn:p2'), URIRef('urn:o2'))
]
91 changes: 91 additions & 0 deletions test/test_sparql/test_dataset_inclusive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from rdflib.graph import ConjunctiveGraph
from rdflib.term import URIRef

graph = ConjunctiveGraph()
# Adding into default graph
graph.add((URIRef("urn:s0"), URIRef("urn:p0"), URIRef("urn:o0")))
# Adding into named graphs
graph.add((URIRef("urn:s1"), URIRef("urn:p1"), URIRef("urn:o1"), URIRef("urn:g1")))
graph.add((URIRef("urn:s2"), URIRef("urn:p2"), URIRef("urn:o2"), URIRef("urn:g2")))
graph.add((URIRef("urn:s3"), URIRef("urn:p3"), URIRef("urn:o3"), URIRef("urn:g3")))


# Test implicit inclusive dataset
# As an inclusive dataset, the default graph should contain a merge of all graphs:
# The default graph + all the named graphs
def test_inclusive():
results = list(graph.query("SELECT ?s ?p ?o WHERE {?s ?p ?o} ORDER BY ?s"))
assert results == [
(URIRef('urn:s0'), URIRef('urn:p0'), URIRef('urn:o0')),
(URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1')),
(URIRef('urn:s2'), URIRef('urn:p2'), URIRef('urn:o2')),
(URIRef('urn:s3'), URIRef('urn:p3'), URIRef('urn:o3'))
]


# Test explicit default graph with inclusive dataset
def test_default_from_1():
results = list(graph.query("SELECT ?s ?p ?o FROM <urn:g1> WHERE {?s ?p ?o}"))
assert results == [(URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1'))]


# test that we include more than one graph into the default graph
def test_default_from_2():
results = list(graph.query("SELECT ?s ?p ?o FROM <urn:g1> FROM <urn:g2> WHERE {?s ?p ?o} ORDER BY ?s"))
assert results == [
(URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1')),
(URIRef('urn:s2'), URIRef('urn:p2'), URIRef('urn:o2'))
]


# Since there is a FROM clause, we consider RDF dataset explicit
# Thus if FROM NAMED is not defined, named graph is considered empty set
def test_named_from():
results = list(graph.query("SELECT ?s ?p ?o FROM <urn:g1> WHERE {graph ?g {?s ?p ?o}} ORDER BY ?s"))
assert results == [], "no result expected"


# Test explicit named graphs with inclusive dataset
def test_named_from_named_1():
results = list(graph.query("SELECT ?g ?s ?p ?o FROM NAMED <urn:g1> WHERE {graph ?g {?s ?p ?o}}"))
assert results == [(URIRef('urn:g1'), URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1'))]


# test that we include more than one graph into the named graphs
def test_named_from_named_2():
query = """
SELECT ?g ?s ?p ?o
FROM NAMED <urn:g1>
FROM NAMED <urn:g2>
WHERE {
graph ?g {?s ?p ?o}
} ORDER BY ?g
"""
results = list(graph.query(query))
assert results == [
(URIRef('urn:g1'), URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1')),
(URIRef('urn:g2'), URIRef('urn:s2'), URIRef('urn:p2'), URIRef('urn:o2'))
]

# Since there is a FROM NAMED clause, we consider RDF dataset explicit
# Thus if FROM is not defined, default graph is considered empty
def test_default_from_named():
results = list(graph.query("SELECT ?g ?s ?p ?o FROM NAMED <urn:g1> WHERE {?s ?p ?o}"))
assert results == [], "no result expected"


def test_from_and_from_named():
query = """
SELECT ?g ?s ?p ?o
FROM <urn:g1>
FROM NAMED <urn:g2>
WHERE {
{?s ?p ?o}
UNION {graph ?g {?s ?p ?o}}
} ORDER BY ?s
"""
results = list(graph.query(query))
assert results == [
(None, URIRef('urn:s1'), URIRef('urn:p1'), URIRef('urn:o1')),
(URIRef('urn:g2'), URIRef('urn:s2'), URIRef('urn:p2'), URIRef('urn:o2'))
]

0 comments on commit d6858e0

Please sign in to comment.