diff --git a/velox/dwio/common/DirectDecoder.h b/velox/dwio/common/DirectDecoder.h index 4cd9396d0936..fd8d62fecda3 100644 --- a/velox/dwio/common/DirectDecoder.h +++ b/velox/dwio/common/DirectDecoder.h @@ -92,7 +92,17 @@ class DirectDecoder : public IntDecoder { } else if constexpr (std::is_same_v< typename Visitor::DataType, int128_t>) { - toSkip = visitor.process(super::template readInt(), atEnd); + if (super::numBytes == 12 /* INT96 */) { + int128_t encoded = super::template readInt(); + int32_t days = encoded & ((1ULL << 32) - 1); + uint64_t nanos = static_cast(encoded >> 32); + + auto timestamp = Timestamp::fromDaysAndNanos(days, nanos); + toSkip = + visitor.process(*reinterpret_cast(×tamp), atEnd); + } else { + toSkip = visitor.process(super::template readInt(), atEnd); + } } else { toSkip = visitor.process(super::template readInt(), atEnd); } diff --git a/velox/dwio/common/IntDecoder.h b/velox/dwio/common/IntDecoder.h index e5e37b429663..6aded93677aa 100644 --- a/velox/dwio/common/IntDecoder.h +++ b/velox/dwio/common/IntDecoder.h @@ -154,6 +154,9 @@ class IntDecoder { template T readInt(); + template + T readInt96(); + template T readVInt(); @@ -438,12 +441,44 @@ inline T IntDecoder::readInt() { return readLittleEndianFromBigEndian(); } else { if constexpr (std::is_same_v) { - VELOX_NYI(); + if (numBytes == 12) { + // TODO:: Do we need to handle useVInts case? + return readInt96(); + } else { + VELOX_NYI(); + } } return readLongLE(); } } +template +template +inline T IntDecoder::readInt96() { + int64_t offset = 0; + unsigned char ch; + + // read unsigned byte 64 + uint64_t part1 = 0; + for (uint32_t i = 0; i < 8; ++i) { + ch = readByte(); + part1 |= (ch & BASE_256_MASK) << offset; + offset += 8; + } + + // read signed byte 32 + int32_t part2 = 0; + offset = 0; + for (uint32_t i = 0; i < 4; ++i) { + ch = readByte(); + part2 |= (ch & BASE_256_MASK) << offset; + offset += 8; + } + + int128_t result = part1; + return (result << 32) | part2; +} + template template inline T IntDecoder::readVInt() { diff --git a/velox/dwio/parquet/tests/examples/timestamp_dict_int96.parquet b/velox/dwio/parquet/tests/examples/timestamp_dict_int96.parquet new file mode 100644 index 000000000000..661cb7a28522 Binary files /dev/null and b/velox/dwio/parquet/tests/examples/timestamp_dict_int96.parquet differ diff --git a/velox/dwio/parquet/tests/examples/timestamp_plain_int96.parquet b/velox/dwio/parquet/tests/examples/timestamp_plain_int96.parquet new file mode 100644 index 000000000000..f2aa666b7d71 Binary files /dev/null and b/velox/dwio/parquet/tests/examples/timestamp_plain_int96.parquet differ diff --git a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp index 6bb72488c8e0..fd44918bb4e2 100644 --- a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp @@ -898,6 +898,34 @@ TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) { assertEqualResults({expected}, result.second); } + +TEST_F(ParquetTableScanTest, timestampINT96) { + auto a = makeFlatVector({Timestamp(1, 0), Timestamp(2, 0)}); + auto expected = makeRowVector({"time"}, {a}); + createDuckDbTable("expected", {expected}); + + auto vector = makeArrayVector({{}}); + loadData( + getExampleFilePath("timestamp_dict_int96.parquet"), + ROW({"time"}, {TIMESTAMP()}), + makeRowVector( + {"time"}, + { + vector, + })); + assertSelect({"time"}, "SELECT time from expected"); + + loadData( + getExampleFilePath("timestamp_plain_int96.parquet"), + ROW({"time"}, {TIMESTAMP()}), + makeRowVector( + {"time"}, + { + vector, + })); + assertSelect({"time"}, "SELECT time from expected"); +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); folly::Init init{&argc, &argv, false};