Skip to content

Commit

Permalink
First working implementation of SERVICE
Browse files Browse the repository at this point in the history
Basic principle: Send the query inside the SERVICE clause to the remote
endpoint and turn the result into a VALUES clause, which is then
processed using (an improved version of) existing code.

1. Update `parsedQuery::Values` to hold a table of `TripleComponent`s
instead of `std::string`s as before. Modify the `SparqlQleverVisitor`
accordingly.

2. Update the `Values : Operation` class to add OOV entries to the
`LocalVocab` (any row containing an oov entry was ignored so far).
Update the JOIN operation to propagate the localVocab if only of the
operands has one (which is a frequent use case).

3. Add functionality to send a POST request to a remote SPARQL endpoint,
receive the result synchronously, and write it to a (potentially very
large) string. Uses Boost.Beast.
  • Loading branch information
Hannah Bast committed Nov 8, 2022
1 parent d9a3e65 commit 690aa00
Show file tree
Hide file tree
Showing 29 changed files with 585 additions and 101 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ find_package(Boost 1.74 COMPONENTS iostreams program_options REQUIRED)
include_directories(${Boost_INCLUDE_DIR})


######################################
# SSL
######################################
find_package(OpenSSL REQUIRED)


set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND=noninteractive

FROM base as builder
RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git
RUN apt install -y libjemalloc-dev ninja-build libzstd-dev
RUN apt install -y libjemalloc-dev ninja-build libzstd-dev libssl-dev
RUN apt install -y libboost1.74-dev libboost-program-options1.74-dev libboost-iostreams1.74-dev

COPY . /app/
Expand All @@ -23,7 +23,7 @@ FROM base as runtime
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make
RUN apt install -y lbzip2 libjemalloc-dev libzstd-dev
RUN apt install -y lbzip2 libjemalloc-dev libzstd-dev libssl-dev
RUN apt install -y libboost1.74-dev libboost-program-options1.74-dev libboost-iostreams1.74-dev

ARG UID=1000
Expand Down
4 changes: 2 additions & 2 deletions Dockerfiles/Dockerfile.Ubuntu18.04
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ RUN apt-get update

RUN apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git
RUN apt install -y gcc-11 g++-11
RUN apt install -y libjemalloc-dev ninja-build libzstd-dev
RUN apt install -y libjemalloc-dev ninja-build libzstd-dev libssl-dev
RUN apt install -y libboost1.74-dev

COPY . /app/
Expand All @@ -35,7 +35,7 @@ FROM base as runtime
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime
RUN apt install -y lbzip2 libjemalloc-dev libzstd-dev
RUN apt install -y lbzip2 libjemalloc-dev libzstd-dev libssl-dev

ARG UID=1000
RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app
Expand Down
4 changes: 2 additions & 2 deletions Dockerfiles/Dockerfile.Ubuntu20.04
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RUN apt-get update
FROM base as builder
RUN apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git
RUN apt install -y gcc-11 g++-11
RUN apt install -y libjemalloc-dev ninja-build libzstd-dev
RUN apt install -y libjemalloc-dev ninja-build libzstd-dev libssl-dev
RUN apt install -y libboost1.74-dev

COPY . /app/
Expand All @@ -29,7 +29,7 @@ FROM base as runtime
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime
RUN apt install -y lbzip2 libjemalloc-dev libzstd-dev
RUN apt install -y lbzip2 libjemalloc-dev libzstd-dev libssl-dev

ARG UID=1000
RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app
Expand Down
2 changes: 1 addition & 1 deletion e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ if [ ${REBUILD_THE_INDEX} == "YES" ] || ! [ -f "${INDEX}.vocabulary" ]; then
rm -f "$INDEX.*"
pushd "$BINARY_DIR"
echo "Building index $INDEX"
./IndexBuilderMain -l -i "$INDEX" \
./IndexBuilderMain -i "$INDEX" \
-F ttl \
-f "$INPUT.nt" \
-s "$PROJECT_DIR/e2e/e2e-build-settings.json" \
Expand Down
8 changes: 5 additions & 3 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -850,14 +850,16 @@ queries:
type: no-text
sparql: |
SELECT ?a WHERE {
VALUES ?a {"obscure Literal"@xf <Albert_Einstein> <Non_Exisiting_Scientist>}
VALUES ?a { "obscure Literal"@xf <Albert_Einstein> <Non_Existing_Scientist> }
}
checks:
- num_cols: 1
- num_rows: 1
- num_rows: 3
- selected: ["?a"]
- contains_row: ["<Albert_Einstein>"]
- contains_warning: ["The word \"obscure Literal\"@xf", "The word <Non_Exisiting", "Ignored 2 rows"]
# - contains_row: ["\"obscure Literal\"@xf"]
- contains_row: ["<Non_Existing_Scientist>"]
# - contains_warning: ["The word \"obscure Literal\"@xf", "The word <Non_Exisiting", "Ignored 2 rows"]


- query: values-empty-join
Expand Down
3 changes: 2 additions & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ add_library(engine
Union.cpp Union.h
MultiColumnJoin.cpp MultiColumnJoin.h
TransitivePath.cpp TransitivePath.h
Service.cpp Service.h
Values.cpp Values.h
Bind.cpp Bind.h
IdTable.h
Expand All @@ -39,4 +40,4 @@ add_library(engine
VariableToColumnMap.cpp)


target_link_libraries(engine index parser sparqlExpressions httpServer SortPerformanceEstimator absl::flat_hash_set ${ICU_LIBRARIES} boost_iostreams)
target_link_libraries(engine index parser sparqlExpressions httpServer SortPerformanceEstimator absl::flat_hash_set ${ICU_LIBRARIES} boost_iostreams OpenSSL::SSL OpenSSL::Crypto)
2 changes: 2 additions & 0 deletions src/engine/CheckUsePatternTrick.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ bool isVariableContainedInGraphPatternOperation(
} else if constexpr (std::is_same_v<T, p::Values>) {
return ad_utility::contains(arg._inlineValues._variables,
variable.name());
} else if constexpr (std::is_same_v<T, p::Service>) {
return check(arg.graphPattern_);
} else {
static_assert(std::is_same_v<T, p::TransPath>);
// The `TransPath` is set up later in the query planning, when this
Expand Down
15 changes: 15 additions & 0 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,21 @@ void Join::computeResult(ResultTable* result) {
_leftJoinCol, rightRes->_idTable, _rightJoinCol,
&result->_idTable);

// If only one of the two operands has a local vocab, pass it on.
bool leftLocalVocabEmpty = leftRes->_localVocab->empty();
bool rightLocalVocabEmpty = rightRes->_localVocab->empty();
if (!leftLocalVocabEmpty || !rightLocalVocabEmpty) {
if (!leftLocalVocabEmpty && rightLocalVocabEmpty) {
result->_localVocab = std::move(leftRes->_localVocab);
} else if (leftLocalVocabEmpty && !rightLocalVocabEmpty) {
result->_localVocab = std::move(rightRes->_localVocab);
} else {
throw std::runtime_error(
"JOIN of two results, where both have a non-empty vocabulary, is "
"currently not supported");
}
}

LOG(DEBUG) << "Join result computation done." << endl;
}

Expand Down
4 changes: 4 additions & 0 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "engine/Join.h"
#include "engine/NeutralElementOperation.h"
#include "engine/OrderBy.h"
#include "engine/Service.h"
#include "engine/Sort.h"
#include "engine/TextOperationWithFilter.h"
#include "engine/TransitivePath.h"
Expand Down Expand Up @@ -617,6 +618,8 @@ void QueryExecutionTree::setOperation(std::shared_ptr<Op> operation) {
_type = DISTINCT;
} else if constexpr (std::is_same_v<Op, Values>) {
_type = VALUES;
} else if constexpr (std::is_same_v<Op, Service>) {
_type = SERVICE;
} else if constexpr (std::is_same_v<Op, TransitivePath>) {
_type = TRANSITIVE_PATH;
} else if constexpr (std::is_same_v<Op, OrderBy>) {
Expand Down Expand Up @@ -648,6 +651,7 @@ template void QueryExecutionTree::setOperation(std::shared_ptr<Bind>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Sort>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Distinct>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Values>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Service>);
template void QueryExecutionTree::setOperation(std::shared_ptr<TransitivePath>);
template void QueryExecutionTree::setOperation(std::shared_ptr<OrderBy>);
template void QueryExecutionTree::setOperation(std::shared_ptr<GroupBy>);
Expand Down
1 change: 1 addition & 0 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class QueryExecutionTree {
MULTICOLUMN_JOIN,
TRANSITIVE_PATH,
VALUES,
SERVICE,
BIND,
MINUS,
NEUTRAL_ELEMENT
Expand Down
5 changes: 4 additions & 1 deletion src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <engine/OptionalJoin.h>
#include <engine/OrderBy.h>
#include <engine/QueryPlanner.h>
#include <engine/Service.h>
#include <engine/Sort.h>
#include <engine/TextOperationWithFilter.h>
#include <engine/TextOperationWithoutFilter.h>
Expand Down Expand Up @@ -453,7 +454,9 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::optimize(
SubtreePlan valuesPlan =
makeSubtreePlan<Values>(_qec, arg._inlineValues);
joinCandidates(std::vector{std::move(valuesPlan)});

} else if constexpr (std::is_same_v<T, p::Service>) {
SubtreePlan servicePlan = makeSubtreePlan<Service>(_qec, arg);
joinCandidates(std::vector{std::move(servicePlan)});
} else if constexpr (std::is_same_v<T, p::Bind>) {
// The logic of the BIND operation is implemented in the joinCandidates
// lambda. Reason: BIND does not add a new join operation like for the
Expand Down
Loading

0 comments on commit 690aa00

Please sign in to comment.