From 943b51d1a47119840d6e461185f687b2aa7d1b29 Mon Sep 17 00:00:00 2001 From: Barson Date: Mon, 1 Aug 2022 23:46:51 +0800 Subject: [PATCH] pre-loading lazy vectors at common parent expr or in ExprSet::eval (#2073) --- velox/exec/FilterProject.cpp | 13 --------- velox/expression/EvalCtx.cpp | 5 ++++ velox/expression/Expr.cpp | 56 +++++++++++++++++++++++++++++------- velox/expression/Expr.h | 6 ++++ 4 files changed, 56 insertions(+), 24 deletions(-) diff --git a/velox/exec/FilterProject.cpp b/velox/exec/FilterProject.cpp index b876a96385194..106a000f5ee49 100644 --- a/velox/exec/FilterProject.cpp +++ b/velox/exec/FilterProject.cpp @@ -159,19 +159,6 @@ RowVectorPtr FilterProject::getOutput() { } void FilterProject::project(const SelectivityVector& rows, EvalCtx* evalCtx) { - // Make sure LazyVectors are loaded for all the "rows". - // - // Consider projection with 2 expressions: f(a) AND g(b), h(b) - // If b is a LazyVector and f(a) AND g(b) expression is evaluated first, it - // will load b only for rows where f(a) is true. However, h(b) projection - // needs all rows for "b". - // - // This works, but may load more rows than necessary. E.g. if we only have - // f(a) AND g(b) expression and b is not used anywhere else, it is sufficient - // to load b for a subset of rows where f(a) is true. - *evalCtx->mutableIsFinalSelection() = false; - *evalCtx->mutableFinalSelection() = &rows; - exprs_->eval( hasFilter_ ? 1 : 0, numExprs_, !hasFilter_, rows, evalCtx, &results_); } diff --git a/velox/expression/EvalCtx.cpp b/velox/expression/EvalCtx.cpp index 7b2da0a64552e..40fdf7c74ce4e 100644 --- a/velox/expression/EvalCtx.cpp +++ b/velox/expression/EvalCtx.cpp @@ -219,6 +219,11 @@ VectorPtr EvalCtx::ensureFieldLoaded( const SelectivityVector& rows) { auto field = getField(index); if (isLazyNotLoaded(*field)) { + // Remain the usage of "finalSelection_". if ExprSet::eval invoked with + // partial rows more than once, LazyVector need to load for all + // the *finalSelection_. you can see the example usage in + // ExprEncodingsTest::run. ExprSet::eval invoked with the first 2/3 rows, + // and then invoked with the last 2/3 rows. const auto& rowsToLoad = isFinalSelection_ ? rows : *finalSelection_; LocalDecodedVector holder(*this); diff --git a/velox/expression/Expr.cpp b/velox/expression/Expr.cpp index e994a8bb0bfb5..97b5e8bdab248 100644 --- a/velox/expression/Expr.cpp +++ b/velox/expression/Expr.cpp @@ -102,6 +102,18 @@ bool hasConditionals(Expr* expr) { return false; } +void findMultiRefFields( + std::set& allFields, + std::set& multiRefFields, + const std::vector& moreFields) { + for (auto* newField : moreFields) { + if (allFields.find(newField) != allFields.end()) { + multiRefFields.insert(newField); + } + allFields.insert(newField); + } +} + } // namespace Expr::Expr( @@ -184,11 +196,15 @@ void Expr::computeMetadata() { propagatesNulls_ = vectorFunction_->isDefaultNullBehavior(); deterministic_ = vectorFunction_->isDeterministic(); } + + std::set allFields; for (auto& input : inputs_) { input->computeMetadata(); deterministic_ &= input->deterministic_; propagatesNulls_ &= input->propagatesNulls_; mergeFields(distinctFields_, input->distinctFields_); + // find the fields referenced by multiple inputs + findMultiRefFields(allFields, multiRefFields_, input->distinctFields_); } if (isSpecialForm()) { propagatesNulls_ = propagatesNulls(); @@ -361,17 +377,27 @@ void Expr::eval( // all the time. Therefore, we should delay loading lazy vectors until we // know the minimum subset of rows needed to be loaded. // + // Load fields multiple referenced by inputs unconditionally. It's hard to + // know the superset of rows the multiple inputs need to load. + // // If there is only one field, load it unconditionally. The very first IF, // AND or OR will have to load it anyway. Pre-loading enables peeling of // encodings at a higher level in the expression tree and avoids repeated // peeling and wrapping in the sub-nodes. // - // TODO: Re-work the logic of deciding when to load which field. + // TODO: only pre-loading lazy vectors that is not flat encoding, + // regardless of hasConditionals_. if (!hasConditionals_ || distinctFields_.size() == 1) { // Load lazy vectors if any. for (const auto& field : distinctFields_) { context.ensureFieldLoaded(field->index(context), rows); } + } else { + // Multiple referenced fields, load at common parent expr with "rows". + // delay loading fields that are not in multiRefFields_. + for (const auto& field : multiRefFields_) { + context.ensureFieldLoaded(field->index(context), rows); + } } if (inputs_.empty()) { @@ -784,8 +810,6 @@ void Expr::evalWithNulls( if (removeSureNulls(rows, context, nonNullHolder)) { VarSetter noMoreNulls(context.mutableNullsPruned(), true); if (nonNullHolder.get()->hasSelections()) { - // No need fix finalSelection here, LazyVector already loaded due to - // removeSureNulls method evalAll(*nonNullHolder.get(), context, result); } auto rawNonNulls = nonNullHolder.get()->asRange().bits(); @@ -1000,14 +1024,6 @@ void Expr::evalAll( bool defaultNulls = vectorFunction_->isDefaultNullBehavior(); inputValues_.resize(inputs_.size()); for (int32_t i = 0; i < inputs_.size(); ++i) { - // Fix finalSelection at "rows" if missingRows is a strict subset. - // "rows" may be used to evaluate exprs outside of current expr node. - bool updateFinalSelection = context.isFinalSelection() && - (remainingRows->countSelected() < rows.countSelected()); - VarSetter isFinalSelection( - context.mutableIsFinalSelection(), false, updateFinalSelection); - VarSetter finalSelection( - context.mutableFinalSelection(), &rows, updateFinalSelection); inputs_[i]->eval(*remainingRows, context, inputValues_[i]); tryPeelArgs = tryPeelArgs && isPeelable(inputValues_[i]->encoding()); if (defaultNulls && inputValues_[i]->mayHaveNulls()) { @@ -1283,6 +1299,11 @@ ExprSet::ExprSet( : execCtx_(execCtx) { exprs_ = compileExpressions( std::move(sources), execCtx, this, enableConstantFolding); + std::set allFields; + for (auto& expr : exprs_) { + // Find the fields referenced by multiple expressions + findMultiRefFields(allFields, multiRefFields_, expr->distinctFields()); + } } namespace { @@ -1357,6 +1378,18 @@ void ExprSet::eval( if (initialize) { clearSharedSubexprs(); } + + // Make sure LazyVectors, referenced by multiple expressions, are loaded + // for all the "rows". + // + // Consider projection with 2 expressions: f(a) AND g(b), h(b) + // If b is a LazyVector and f(a) AND g(b) expression is evaluated first, it + // will load b only for rows where f(a) is true. However, h(b) projection + // needs all rows for "b". + for (const auto& field : multiRefFields_) { + context->ensureFieldLoaded(field->index(*context), rows); + } + for (int32_t i = begin; i < end; ++i) { exprs_[i]->eval(rows, *context, (*result)[i]); } @@ -1373,6 +1406,7 @@ void ExprSet::clear() { for (auto* memo : memoizingExprs_) { memo->clearMemo(); } + multiRefFields_.clear(); } void ExprSetSimplified::eval( diff --git a/velox/expression/Expr.h b/velox/expression/Expr.h index a5c26aae1ba68..9e1683652cddd 100644 --- a/velox/expression/Expr.h +++ b/velox/expression/Expr.h @@ -331,6 +331,10 @@ class Expr { // parent Expr. std::vector distinctFields_; + // Fields referenced by multiple inputs, which is subset of distinctFields_. + // used to determine pre-loading of lazy vectors at current expr + std::set multiRefFields_; + // True if a null in any of 'distinctFields_' causes 'this' to be // null for the row. bool propagatesNulls_ = false; @@ -438,6 +442,8 @@ class ExprSet { std::vector> exprs_; + std::set multiRefFields_; + // Distinct Exprs reachable from 'exprs_' for which reset() needs to // be called at the start of eval(). std::vector> toReset_;