Avoid reading and decompressing index blocks during query planning (#…

…1725) Avoid reading and decompressing the first and last block of an index scan during query planning (which requires only a size estimate and not the exact size). This added significant performance overhead, when only few blocks were eventually read in the index scan. Instead, we now do the following: First check whether the first and last block fully match the scan specification (this is fast). Second, if this is not the case, assume that the number of elements is a fixed fraction of total number of elements in the block (which we know from the metadata). The fixed fraction is defined by the new runtime parameter `small-index-scan-size-estimate-divisor`.
ad-freiburg · Jan 23, 2025 · b10d0b7 · b10d0b7
1 parent 6f6e0c4
commit b10d0b7
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 25 deletions.
diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h
@@ -53,7 +53,12 @@ inline auto& RuntimeParameters() {
         Bool<"throw-on-unbound-variables">{false},
         // Control up until which size lazy results should be cached. Caching
         // does cause significant overhead for this case.
-        MemorySizeParameter<"lazy-result-max-cache-size">{5_MB}};
+        MemorySizeParameter<"lazy-result-max-cache-size">{5_MB},
+        // When the result of an index scan is smaller than a single block, then
+        // its size estimate will be the size of the block divided by this
+        // value.
+        SizeT<"small-index-scan-size-estimate-divisor">{5},
+    };
   }();
   return params;
 }

diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp
@@ -29,6 +29,35 @@ static auto getBeginAndEnd(auto& range) {
   return std::pair{ql::ranges::begin(range), ql::ranges::end(range)};
 }
 
+// Return true iff the `triple` is contained in the `scanSpec`. For example, the
+// triple ` 42 0 3 ` is contained in the specs `U U U`, `42 U U` and `42 0 U` ,
+// but not in `42 2 U` where `U` means "scan for all possible values".
+static auto isTripleInSpecification =
+    [](const ScanSpecification& scanSpec,
+       const CompressedBlockMetadata::PermutedTriple& triple) {
+      enum struct M { GuaranteedMatch, Mismatch, MustCheckNextElement };
+      auto checkElement = [](const auto& optId, Id id) {
+        if (!optId.has_value()) {
+          return M::GuaranteedMatch;
+        } else if (optId.value() != id) {
+          return M::Mismatch;
+        } else {
+          return M::MustCheckNextElement;
+        }
+      };
+      auto result = checkElement(scanSpec.col0Id(), triple.col0Id_);
+      if (result == M::MustCheckNextElement) {
+        result = checkElement(scanSpec.col1Id(), triple.col1Id_);
+      }
+      if (result == M::MustCheckNextElement) {
+        result = checkElement(scanSpec.col2Id(), triple.col2Id_);
+      }
+      // The case `result == M::MustCheckNextElement` can happen in the unlikely
+      // case that there only is a single triple in the block, which is scanned
+      // for explicitly.
+      return result != M::Mismatch;
+    };
+
 // modify the `block` according to the `limitOffset`. Also modify the
 // `limitOffset` to reflect the parts of the LIMIT and OFFSET that have been
 // performed by pruning this `block`.
@@ -631,32 +660,53 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
   // a part of these blocks is actually part of the result,
   // set up a lambda which allows us to read these blocks, and returns
   // the size of the result.
+  size_t numResults = 0;
+  // Determine the total size of the result.
+  // First accumulate the complete blocks in the "middle"
+  std::size_t inserted = 0;
+  std::size_t deleted = 0;
+
   auto readSizeOfPossiblyIncompleteBlock = [&](const auto& block) {
-    return readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
-                                       locatedTriplesPerBlock)
-        .numRows();
+    if (exactSize) {
+      numResults +=
+          readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
+                                      locatedTriplesPerBlock)
+              .numRows();
+    } else {
+      // If the first and last triple of the block match, then we know that the
+      // whole block belongs to the result.
+      bool isComplete = isTripleInSpecification(scanSpec, block.firstTriple_) &&
+                        isTripleInSpecification(scanSpec, block.lastTriple_);
+      size_t divisor =
+          isComplete ? 1
+                     : RuntimeParameters()
+                           .get<"small-index-scan-size-estimate-divisor">();
+      const auto [ins, del] =
+          locatedTriplesPerBlock.numTriples(block.blockIndex_);
+      auto trunc = [divisor](size_t num) {
+        return std::max(std::min(num, 1ul), num / divisor);
+      };
+      inserted += trunc(ins);
+      deleted += trunc(del);
+      numResults += trunc(block.numRows_);
+    }
   };
 
-  size_t numResults = 0;
   // The first and the last block might be incomplete, compute
   // and store the partial results from them.
   if (beginBlock < endBlock) {
-    numResults += readSizeOfPossiblyIncompleteBlock(*beginBlock);
+    readSizeOfPossiblyIncompleteBlock(*beginBlock);
     ++beginBlock;
   }
   if (beginBlock < endBlock) {
-    numResults += readSizeOfPossiblyIncompleteBlock(*(endBlock - 1));
+    readSizeOfPossiblyIncompleteBlock(*(endBlock - 1));
     --endBlock;
   }
 
   if (beginBlock == endBlock) {
     return {numResults, numResults};
   }
 
-  // Determine the total size of the result.
-  // First accumulate the complete blocks in the "middle"
-  std::size_t inserted = 0;
-  std::size_t deleted = 0;
   ql::ranges::for_each(
       ql::ranges::subrange{beginBlock, endBlock}, [&](const auto& block) {
         const auto [ins, del] =
@@ -666,8 +716,8 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
           deleted += del;
           numResults += block.numRows_;
         } else {
-          // TODO<joka921> We could cache the exact size as soon as we have
-          // merged the block once since the last update.
+          // TODO<joka921> We could cache the exact size as soon as we
+          // have merged the block once since the last update.
           auto b = readAndDecompressBlock(block, config);
           numResults += b.has_value() ? b.value().block_.numRows() : 0u;
         }
@@ -1366,10 +1416,10 @@ auto CompressedRelationWriter::createPermutationPair(
         // relation as its overhead is far too high for small relations.
         relation.swapColumns(c1Idx, c2Idx);
 
-        // We only need to sort by the columns of the triple + the graph column,
-        // not the additional payload. Note: We could also use
-        // `compareWithoutLocalVocab` to compare the IDs cheaper, but this sort
-        // is far from being a performance bottleneck.
+        // We only need to sort by the columns of the triple + the graph
+        // column, not the additional payload. Note: We could also use
+        // `compareWithoutLocalVocab` to compare the IDs cheaper, but this
+        // sort is far from being a performance bottleneck.
         auto compare = [](const auto& a, const auto& b) {
           return std::tie(a[0], a[1], a[2], a[3]) <
                  std::tie(b[0], b[1], b[2], b[3]);

diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp
@@ -498,7 +498,8 @@ TEST(IndexScan, getResultSizeOfScan) {
     SparqlTripleSimple scanTriple{I::fromIriref("<x2>"), I::fromIriref("<p>"),
                                   I::fromIriref("<s1>")};
     IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
-    EXPECT_EQ(scan.getSizeEstimate(), 0);
+    EXPECT_EQ(scan.getSizeEstimate(), 1);
+    EXPECT_EQ(scan.getExactSize(), 0);
   }
   {
     SparqlTripleSimple scanTriple{I::fromIriref("<x>"), I::fromIriref("<p>"),

diff --git a/test/engine/SpatialJoinTest.cpp b/test/engine/SpatialJoinTest.cpp
@@ -926,22 +926,22 @@ class SpatialJoinMultiplicityAndSizeEstimateTest
         spatialJoin = static_cast<SpatialJoin*>(spJoin2.get());
         auto varColsMap = spatialJoin->getExternallyVisibleVariableColumns();
 
-        assertMultiplicity(subj1.getVariable(), 9.8, spatialJoin, varColsMap);
-        assertMultiplicity(obj1.getVariable(), 7.0, spatialJoin, varColsMap);
-        assertMultiplicity(subj2.getVariable(), 9.8, spatialJoin, varColsMap);
-        assertMultiplicity(obj2.getVariable(), 7.0, spatialJoin, varColsMap);
+        assertMultiplicity(subj1.getVariable(), 4.2, spatialJoin, varColsMap);
+        assertMultiplicity(obj1.getVariable(), 3.0, spatialJoin, varColsMap);
+        assertMultiplicity(subj2.getVariable(), 4.2, spatialJoin, varColsMap);
+        assertMultiplicity(obj2.getVariable(), 3.0, spatialJoin, varColsMap);
         ASSERT_TRUE(
             spatialJoin->onlyForTestingGetDistanceVariable().has_value());
         assertMultiplicity(Variable{"?distanceForTesting"}, 1, spatialJoin,
                            varColsMap);
       } else {
-        ASSERT_EQ(leftChild->getSizeEstimate(), 7);
-        ASSERT_EQ(rightChild->getSizeEstimate(), 7);
+        auto leftEstimate = leftChild->getSizeEstimate();
+        auto rightEstimate = rightChild->getSizeEstimate();
         auto spJoin1 = spatialJoin->addChild(firstChild, firstVariable);
         spatialJoin = static_cast<SpatialJoin*>(spJoin1.get());
         auto spJoin2 = spatialJoin->addChild(secondChild, secondVariable);
         spatialJoin = static_cast<SpatialJoin*>(spJoin2.get());
-        ASSERT_LE(spatialJoin->getSizeEstimate(), 49);
+        ASSERT_LE(spatialJoin->getSizeEstimate(), leftEstimate * rightEstimate);
       }
     }
   }