diff --git a/src/engine/CheckUsePatternTrick.cpp b/src/engine/CheckUsePatternTrick.cpp index e7da58ea1..b4c9a7644 100644 --- a/src/engine/CheckUsePatternTrick.cpp +++ b/src/engine/CheckUsePatternTrick.cpp @@ -82,13 +82,148 @@ bool isVariableContainedInGraphPatternOperation( }); } +using ValuesClause = std::optional; +// TODO How many possible return values do we need here. +bool addValuesClauseToPattern(parsedQuery::GraphPatternOperation& operation, + const ValuesClause& clause); + +// __________________________________________________________________________ +void addValuesClause(ParsedQuery::GraphPattern& graphPattern, + const ValuesClause& values, bool recurse) { + // TODO Do we want to do this, or do we only want this if the values + // clause hasn't been handled downstream. + /* + bool containedInFilter = ql::ranges::any_of( + graphPattern._filters, [&values](const SparqlFilter& filter) { + return ql::ranges::any_of( + values._inlineValues._variables, [&filter](const Variable& var) { + return filter.expression_.isVariableContained(var); + }); + }); + */ + [[maybe_unused]] const bool containedInFilter = false; + auto check = [&](parsedQuery::GraphPatternOperation& op) { + return addValuesClauseToPattern(op, values); + }; + // TODO We have to figure out the correct positioning of the values + // clause, s.t. we don't get cartesian products because of optimization + // barriers like bind/Optional/Minus etc. + std::optional insertPosition; + if (values.has_value()) { + for (const auto& [i, pattern] : + ::ranges::views::enumerate(graphPattern._graphPatterns)) { + if (check(pattern)) { + insertPosition = i; + } + } + } + + if (!recurse) { + return; + } + if (insertPosition.has_value()) { + graphPattern._graphPatterns.insert( + graphPattern._graphPatterns.begin() + insertPosition.value(), + values.value()); + } + + std::vector foundClauses; + for (const auto& pattern : graphPattern._graphPatterns) { + if (auto* foundValues = std::get_if(&pattern)) { + foundClauses.push_back(*foundValues); + } + } + for (const auto& foundValue : foundClauses) { + addValuesClause(graphPattern, foundValue, false); + } + + if (foundClauses.empty()) { + for (auto& pattern : graphPattern._graphPatterns) { + addValuesClauseToPattern(pattern, std::nullopt); + } + } +} + +// __________________________________________________________________________ +bool addValuesClauseToPattern(parsedQuery::GraphPatternOperation& operation, + const ValuesClause& result) { + auto check = [&](parsedQuery::GraphPattern& pattern) { + addValuesClause(pattern, result); + return false; + }; + const std::vector emptyVars{}; + const auto& variables = + result.has_value() ? result.value()._inlineValues._variables : emptyVars; + auto anyVar = [&](auto f) { return ql::ranges::any_of(variables, f); }; + return operation.visit([&](auto&& arg) -> bool { + using T = std::decay_t; + if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v) { + return check(arg._child); + } else if constexpr (std::is_same_v) { + check(arg._child1); + check(arg._child2); + return false; + } else if constexpr (std::is_same_v) { + // Subqueries always are SELECT clauses. + const auto& selectClause = arg.get().selectClause(); + + if (anyVar([&selectClause](const auto& var) { + return ad_utility::contains(selectClause.getSelectedVariables(), + var); + })) { + return check(arg.get()._rootGraphPattern); + } else { + // Also recurse into the subquery, but not with the given `VALUES` + // clause. + addValuesClause(arg.get()._rootGraphPattern, std::nullopt); + return false; + } + } else if constexpr (std::is_same_v) { + return ql::ranges::any_of(variables, [&](const auto& variable) { + return ad_utility::contains(arg.containedVariables(), variable); + }); + } else if constexpr (std::is_same_v) { + return ad_utility::contains_if( + arg._triples, [&](const SparqlTriple& triple) { + return anyVar([&](const auto& variable) { + return (triple.s_ == variable || + // Complex property paths are not allowed to contain + // variables in SPARQL, so this check is sufficient. + // TODO Still make the interface of the + // `PropertyPath` class typesafe. + triple.p_.asString() == variable.name() || + triple.o_ == variable); + }); + }); + } else if constexpr (std::is_same_v) { + return anyVar([&](const auto& variable) { + return ad_utility::contains(arg._inlineValues._variables, variable); + }); + } else if constexpr (std::is_same_v) { + return anyVar([&](const auto& variable) { + return ad_utility::contains(arg.visibleVariables_, variable); + }); + } else { + static_assert( + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v); + // TODO This is just an optimization, so we can always just omit + // it, but it would be nice to also apply this optimization for those + // types of queries. + return false; + } + }); +} + // Internal helper function. -// Modify the `triples` s.t. the patterns for `subAndPred.subject_` will appear -// in a column with the variable `subAndPred.predicate_` when evaluating and -// joining all the triples. This can be either done by retrieving one of the -// additional columns where the patterns are stored in the PSO and POS -// permutation or, if no triple suitable for adding this column exists, by -// adding a triple `?subject ql:has-pattern ?predicate`. +// Modify the `triples` s.t. the patterns for `subAndPred.subject_` will +// appear in a column with the variable `subAndPred.predicate_` when +// evaluating and joining all the triples. This can be either done by +// retrieving one of the additional columns where the patterns are stored in +// the PSO and POS permutation or, if no triple suitable for adding this +// column exists, by adding a triple `?subject ql:has-pattern ?predicate`. static void rewriteTriplesForPatternTrick(const PatternTrickTuple& subAndPred, std::vector& triples) { // The following lambda tries to find a triple in the `triples` that has the @@ -96,8 +231,8 @@ static void rewriteTriplesForPatternTrick(const PatternTrickTuple& subAndPred, // either the subject or the object) and a fixed predicate (no variable). If // such a triple is found, it is modified s.t. it also scans the // `additionalScanColumn` which has to be the index of the column where the - // patterns of the `triplePosition` are stored in the POS and PSO permutation. - // Return true iff such a triple was found and replaced. + // patterns of the `triplePosition` are stored in the POS and PSO + // permutation. Return true iff such a triple was found and replaced. auto findAndRewriteMatchingTriple = [&subAndPred, &triples]( auto triplePosition, size_t additionalScanColumn) { @@ -133,8 +268,9 @@ static void rewriteTriplesForPatternTrick(const PatternTrickTuple& subAndPred, // Check if any of the triples in the `graphPattern` has the form `?s // ql:has-predicate ?p` or `?s ?p ?o` and that the other conditions for the // pattern trick are fulfilled (nameley that the variables `?p` and if present -// `?o` don't appear elsewhere in the `parsedQuery`. If such a triple is found, -// the query is modified such that it behaves as if the triple was replace by +// `?o` don't appear elsewhere in the `parsedQuery`. If such a triple is +// found, the query is modified such that it behaves as if the triple was +// replace by // `?s ql:has-pattern ?p`. See the documentation of // `rewriteTriplesForPatternTrick` above. static std::optional findPatternTrickTuple( @@ -183,8 +319,8 @@ std::optional checkUsePatternTrick( } // We currently accept the pattern trick triple anywhere in the query. - // TODO This loop can be made much easier using ranges and view once - // they are supported by clang. + // TODO This loop can be made much easier using ranges and view + // once they are supported by clang. for (auto& pattern : parsedQuery->children()) { auto* curPattern = std::get_if(&pattern); if (!curPattern) { diff --git a/src/engine/CheckUsePatternTrick.h b/src/engine/CheckUsePatternTrick.h index 47db39963..52ddfc292 100644 --- a/src/engine/CheckUsePatternTrick.h +++ b/src/engine/CheckUsePatternTrick.h @@ -57,4 +57,10 @@ bool isVariableContainedInGraphPatternOperation( const parsedQuery::GraphPatternOperation& operation, const SparqlTriple* tripleToIgnore); +// __________________________________________________________________________ +void addValuesClause( + ParsedQuery::GraphPattern& graphPattern, + const std::optional& values = std::nullopt, + bool recurse = true); + } // namespace checkUsePatternTrick diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 9dd6b5599..d03000abf 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -205,9 +205,15 @@ std::vector QueryPlanner::createExecutionTrees( } // _____________________________________________________________________________ -QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq, +QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pqIn, bool isSubquery) { try { + ParsedQuery copy; + if (!isSubquery) { + copy = pqIn; + checkUsePatternTrick::addValuesClause(copy._rootGraphPattern); + } + auto& pq = isSubquery ? pqIn : copy; auto lastRow = createExecutionTrees(pq, isSubquery); auto minInd = findCheapestExecutionTree(lastRow); LOG(DEBUG) << "Done creating execution plan" << std::endl;