Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push down VALUES clauses into the execution tree. #1706

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 148 additions & 12 deletions src/engine/CheckUsePatternTrick.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,22 +82,157 @@
});
}

using ValuesClause = std::optional<parsedQuery::Values>;
// TODO<joka921> How many possible return values do we need here.
bool addValuesClauseToPattern(parsedQuery::GraphPatternOperation& operation,
const ValuesClause& clause);

// __________________________________________________________________________
void addValuesClause(ParsedQuery::GraphPattern& graphPattern,
const ValuesClause& values, bool recurse) {
// TODO<joka921> Do we want to do this, or do we only want this if the values
// clause hasn't been handled downstream.
/*
bool containedInFilter = ql::ranges::any_of(
graphPattern._filters, [&values](const SparqlFilter& filter) {
return ql::ranges::any_of(
values._inlineValues._variables, [&filter](const Variable& var) {
return filter.expression_.isVariableContained(var);
});
});
*/
[[maybe_unused]] const bool containedInFilter = false;
auto check = [&](parsedQuery::GraphPatternOperation& op) {
return addValuesClauseToPattern(op, values);
};
// TODO<joka921> We have to figure out the correct positioning of the values
// clause, s.t. we don't get cartesian products because of optimization
// barriers like bind/Optional/Minus etc.
std::optional<size_t> insertPosition;
if (values.has_value()) {
for (const auto& [i, pattern] :
::ranges::views::enumerate(graphPattern._graphPatterns)) {
if (check(pattern)) {
insertPosition = i;
}
}
}

if (!recurse) {
return;
}
if (insertPosition.has_value()) {
graphPattern._graphPatterns.insert(
graphPattern._graphPatterns.begin() + insertPosition.value(),
values.value());
}

Check warning on line 128 in src/engine/CheckUsePatternTrick.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/CheckUsePatternTrick.cpp#L125-L128

Added lines #L125 - L128 were not covered by tests

std::vector<ValuesClause> foundClauses;
for (const auto& pattern : graphPattern._graphPatterns) {
if (auto* foundValues = std::get_if<parsedQuery::Values>(&pattern)) {
foundClauses.push_back(*foundValues);
}
}
for (const auto& foundValue : foundClauses) {
addValuesClause(graphPattern, foundValue, false);
}

if (foundClauses.empty()) {
for (auto& pattern : graphPattern._graphPatterns) {
addValuesClauseToPattern(pattern, std::nullopt);
}
}
}

// __________________________________________________________________________
bool addValuesClauseToPattern(parsedQuery::GraphPatternOperation& operation,
const ValuesClause& result) {
auto check = [&](parsedQuery::GraphPattern& pattern) {
addValuesClause(pattern, result);
return false;
};
const std::vector<Variable> emptyVars{};
const auto& variables =
result.has_value() ? result.value()._inlineValues._variables : emptyVars;
auto anyVar = [&](auto f) { return ql::ranges::any_of(variables, f); };
return operation.visit([&](auto&& arg) -> bool {
using T = std::decay_t<decltype(arg)>;
if constexpr (std::is_same_v<T, p::Optional> ||
std::is_same_v<T, p::GroupGraphPattern> ||
std::is_same_v<T, p::Minus>) {
return check(arg._child);
} else if constexpr (std::is_same_v<T, p::Union>) {
check(arg._child1);
check(arg._child2);
return false;
} else if constexpr (std::is_same_v<T, p::Subquery>) {
// Subqueries always are SELECT clauses.
const auto& selectClause = arg.get().selectClause();

if (anyVar([&selectClause](const auto& var) {
return ad_utility::contains(selectClause.getSelectedVariables(),
var);
})) {
return check(arg.get()._rootGraphPattern);

Check warning on line 176 in src/engine/CheckUsePatternTrick.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/CheckUsePatternTrick.cpp#L173-L176

Added lines #L173 - L176 were not covered by tests
} else {
// Also recurse into the subquery, but not with the given `VALUES`
// clause.
addValuesClause(arg.get()._rootGraphPattern, std::nullopt);
return false;
}
} else if constexpr (std::is_same_v<T, p::Bind>) {
return ql::ranges::any_of(variables, [&](const auto& variable) {
return ad_utility::contains(arg.containedVariables(), variable);
});

Check warning on line 186 in src/engine/CheckUsePatternTrick.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/CheckUsePatternTrick.cpp#L185-L186

Added lines #L185 - L186 were not covered by tests
} else if constexpr (std::is_same_v<T, p::BasicGraphPattern>) {
return ad_utility::contains_if(
arg._triples, [&](const SparqlTriple& triple) {
return anyVar([&](const auto& variable) {
return (triple.s_ == variable ||
// Complex property paths are not allowed to contain
// variables in SPARQL, so this check is sufficient.
// TODO<joka921> Still make the interface of the
// `PropertyPath` class typesafe.
triple.p_.asString() == variable.name() ||
triple.o_ == variable);
});

Check warning on line 198 in src/engine/CheckUsePatternTrick.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/CheckUsePatternTrick.cpp#L198

Added line #L198 was not covered by tests
});
} else if constexpr (std::is_same_v<T, p::Values>) {
return anyVar([&](const auto& variable) {
return ad_utility::contains(arg._inlineValues._variables, variable);
});
} else if constexpr (std::is_same_v<T, p::Service>) {
return anyVar([&](const auto& variable) {
return ad_utility::contains(arg.visibleVariables_, variable);
});

Check warning on line 207 in src/engine/CheckUsePatternTrick.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/CheckUsePatternTrick.cpp#L206-L207

Added lines #L206 - L207 were not covered by tests
} else {
static_assert(
std::is_same_v<T, p::TransPath> || std::is_same_v<T, p::PathQuery> ||
std::is_same_v<T, p::Describe> || std::is_same_v<T, p::SpatialQuery>);
// TODO<joka921> This is just an optimization, so we can always just omit
// it, but it would be nice to also apply this optimization for those
// types of queries.
return false;
}
});
}

// Internal helper function.
// Modify the `triples` s.t. the patterns for `subAndPred.subject_` will appear
// in a column with the variable `subAndPred.predicate_` when evaluating and
// joining all the triples. This can be either done by retrieving one of the
// additional columns where the patterns are stored in the PSO and POS
// permutation or, if no triple suitable for adding this column exists, by
// adding a triple `?subject ql:has-pattern ?predicate`.
// Modify the `triples` s.t. the patterns for `subAndPred.subject_` will
// appear in a column with the variable `subAndPred.predicate_` when
// evaluating and joining all the triples. This can be either done by
// retrieving one of the additional columns where the patterns are stored in
// the PSO and POS permutation or, if no triple suitable for adding this
// column exists, by adding a triple `?subject ql:has-pattern ?predicate`.
static void rewriteTriplesForPatternTrick(const PatternTrickTuple& subAndPred,
std::vector<SparqlTriple>& triples) {
// The following lambda tries to find a triple in the `triples` that has the
// subject variable of the pattern trick in its `triplePosition` (which is
// either the subject or the object) and a fixed predicate (no variable). If
// such a triple is found, it is modified s.t. it also scans the
// `additionalScanColumn` which has to be the index of the column where the
// patterns of the `triplePosition` are stored in the POS and PSO permutation.
// Return true iff such a triple was found and replaced.
// patterns of the `triplePosition` are stored in the POS and PSO
// permutation. Return true iff such a triple was found and replaced.
auto findAndRewriteMatchingTriple = [&subAndPred, &triples](
auto triplePosition,
size_t additionalScanColumn) {
Expand Down Expand Up @@ -133,8 +268,9 @@
// Check if any of the triples in the `graphPattern` has the form `?s
// ql:has-predicate ?p` or `?s ?p ?o` and that the other conditions for the
// pattern trick are fulfilled (nameley that the variables `?p` and if present
// `?o` don't appear elsewhere in the `parsedQuery`. If such a triple is found,
// the query is modified such that it behaves as if the triple was replace by
// `?o` don't appear elsewhere in the `parsedQuery`. If such a triple is
// found, the query is modified such that it behaves as if the triple was
// replace by
// `?s ql:has-pattern ?p`. See the documentation of
// `rewriteTriplesForPatternTrick` above.
static std::optional<PatternTrickTuple> findPatternTrickTuple(
Expand Down Expand Up @@ -183,8 +319,8 @@
}

// We currently accept the pattern trick triple anywhere in the query.
// TODO<joka921> This loop can be made much easier using ranges and view once
// they are supported by clang.
// TODO<joka921> This loop can be made much easier using ranges and view
// once they are supported by clang.
for (auto& pattern : parsedQuery->children()) {
auto* curPattern = std::get_if<p::BasicGraphPattern>(&pattern);
if (!curPattern) {
Expand Down
6 changes: 6 additions & 0 deletions src/engine/CheckUsePatternTrick.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,10 @@ bool isVariableContainedInGraphPatternOperation(
const parsedQuery::GraphPatternOperation& operation,
const SparqlTriple* tripleToIgnore);

// __________________________________________________________________________
void addValuesClause(
ParsedQuery::GraphPattern& graphPattern,
const std::optional<parsedQuery::Values>& values = std::nullopt,
bool recurse = true);

} // namespace checkUsePatternTrick
8 changes: 7 additions & 1 deletion src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,15 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createExecutionTrees(
}

// _____________________________________________________________________________
QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq,
QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pqIn,
bool isSubquery) {
try {
ParsedQuery copy;
if (!isSubquery) {
copy = pqIn;
checkUsePatternTrick::addValuesClause(copy._rootGraphPattern);
}
auto& pq = isSubquery ? pqIn : copy;
auto lastRow = createExecutionTrees(pq, isSubquery);
auto minInd = findCheapestExecutionTree(lastRow);
LOG(DEBUG) << "Done creating execution plan" << std::endl;
Expand Down
Loading