diff --git a/velox/dwio/common/ColumnVisitors.h b/velox/dwio/common/ColumnVisitors.h index 8dd63e2f45de..116e325d3558 100644 --- a/velox/dwio/common/ColumnVisitors.h +++ b/velox/dwio/common/ColumnVisitors.h @@ -1347,8 +1347,8 @@ class ExtractStringDictionaryToGenericHook { hook_->addValue(rowIndex, &view); } else { VELOX_DCHECK(state_.inDictionary); - auto view = folly::StringPiece( - reinterpret_cast(state_.dictionary.values)[value]); + auto view = folly::StringPiece(reinterpret_cast( + state_.dictionary2.values)[value - dictionarySize()]); hook_->addValue(rowIndex, &view); } } diff --git a/velox/dwio/common/IntDecoder.cpp b/velox/dwio/common/IntDecoder.cpp index c450a01d27bc..eab0813fc921 100644 --- a/velox/dwio/common/IntDecoder.cpp +++ b/velox/dwio/common/IntDecoder.cpp @@ -2570,6 +2570,11 @@ void IntDecoder::decodeBitsLE( const char* bufferEnd, T* FOLLY_NONNULL result) { uint64_t mask = bits::lowMask(bitWidth); + if (bitWidth == 0) { + // A column of dictionary indices can be 0 bits wide if all indices are 0. + memset(result, 0, rows.size() * sizeof(T)); + return; + } // We subtract rowBias * bitWidth bits from the starting position. bitOffset -= rowBias * bitWidth; if (bitOffset < 0) { diff --git a/velox/dwio/common/tests/DecodeBitsTest.cpp b/velox/dwio/common/tests/DecodeBitsTest.cpp index dc7a87b0acf9..ced5cc45fbc3 100644 --- a/velox/dwio/common/tests/DecodeBitsTest.cpp +++ b/velox/dwio/common/tests/DecodeBitsTest.cpp @@ -126,7 +126,7 @@ class DecodeBitsTest : public testing::Test { }; TEST_F(DecodeBitsTest, allWidths) { - for (auto width = 1; width < bitPackedData_.size(); ++width) { + for (auto width = 0; width < bitPackedData_.size(); ++width) { testDecodeRows(width, allRows_); testDecodeRows(width, allRows_); testDecodeRows(width, oddRows_); diff --git a/velox/dwio/common/tests/E2EFilterTestBase.cpp b/velox/dwio/common/tests/E2EFilterTestBase.cpp index 6d9f5173525e..7b0754f3e3f7 100644 --- a/velox/dwio/common/tests/E2EFilterTestBase.cpp +++ b/velox/dwio/common/tests/E2EFilterTestBase.cpp @@ -110,17 +110,20 @@ void E2EFilterTestBase::makeStringDistribution( continue; } std::string value; - if (counter % 100 < cardinality) { + if (counter % 2251 < 100 || cardinality == 1) { + // Run of 100 ascending values every 2251 rows. If cardinality is 1, the + // value is repeated here. value = fmt::format("s{}", counter % cardinality); strings->set(row, StringView(value)); } else if (counter % 100 > 90 && row > 0) { - strings->copy(strings, row - 1, row, 1); + // Sequence of 10 identical values every 100 rows. + strings->copy(strings, row, row - 1, 1); } else if (addOneOffs && counter % 234 == 0) { value = fmt::format( "s{}", folly::Random::rand32(filterGenerator->rng()) % (111 * cardinality)); - + strings->set(row, StringView(value)); } else { value = fmt::format( "s{}", folly::Random::rand32(filterGenerator->rng()) % cardinality); diff --git a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp index f591c710b77c..22890c1ad532 100644 --- a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp +++ b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp @@ -345,10 +345,12 @@ TEST_F(E2EFilterTest, stringDirect) { TEST_F(E2EFilterTest, stringDictionary) { testWithTypes( "string_val:string," - "string_val_2:string", + "string_val_2:string," + "string_const: string", [&]() { makeStringDistribution(Subfield("string_val"), 100, true, false); makeStringDistribution(Subfield("string_val_2"), 170, false, true); + makeStringDistribution(Subfield("string_const"), 1, true, false); }, false, {"string_val", "string_val_2"},