From 25a53f1a63fd3c4059d4ff9e818367a4073e381f Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 27 Mar 2021 07:56:29 -0700 Subject: [PATCH] [SPARK-34542][BUILD] Upgrade Parquet to 1.12.0 Parquet 1.12.0 New Feature - PARQUET-41 - Add bloom filters to parquet statistics - PARQUET-1373 - Encryption key management tools - PARQUET-1396 - Example of using EncryptionPropertiesFactory and DecryptionPropertiesFactory - PARQUET-1622 - Add BYTE_STREAM_SPLIT encoding - PARQUET-1784 - Column-wise configuration - PARQUET-1817 - Crypto Properties Factory - PARQUET-1854 - Properties-Driven Interface to Parquet Encryption Parquet 1.12.0 release notes: https://github.com/apache/parquet-mr/blob/apache-parquet-1.12.0/CHANGES.md - Bloom filters to improve filter performance - ZSTD enhancement No. Existing unit test. Closes #31649 from wangyum/SPARK-34542. Lead-authored-by: Yuming Wang Co-authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 12 ++++++------ dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 12 ++++++------ pom.xml | 4 ++-- .../org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 0544f5f2c3310..976de7a95e210 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -201,12 +201,12 @@ orc-shims/1.6.11//orc-shims-1.6.11.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.10.1//parquet-column-1.10.1.jar -parquet-common/1.10.1//parquet-common-1.10.1.jar -parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar -parquet-format/2.4.0//parquet-format-2.4.0.jar -parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar -parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar +parquet-column/1.12.0//parquet-column-1.12.0.jar +parquet-common/1.12.0//parquet-common-1.12.0.jar +parquet-encoding/1.12.0//parquet-encoding-1.12.0.jar +parquet-format-structures/1.12.0//parquet-format-structures-1.12.0.jar +parquet-hadoop/1.12.0//parquet-hadoop-1.12.0.jar +parquet-jackson/1.12.0//parquet-jackson-1.12.0.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9//py4j-0.10.9.jar pyrolite/4.30//pyrolite-4.30.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 5258904c9c38a..a81de4b21a795 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -214,12 +214,12 @@ orc-shims/1.6.11//orc-shims-1.6.11.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.10.1//parquet-column-1.10.1.jar -parquet-common/1.10.1//parquet-common-1.10.1.jar -parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar -parquet-format/2.4.0//parquet-format-2.4.0.jar -parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar -parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar +parquet-column/1.12.0//parquet-column-1.12.0.jar +parquet-common/1.12.0//parquet-common-1.12.0.jar +parquet-encoding/1.12.0//parquet-encoding-1.12.0.jar +parquet-format-structures/1.12.0//parquet-format-structures-1.12.0.jar +parquet-hadoop/1.12.0//parquet-hadoop-1.12.0.jar +parquet-jackson/1.12.0//parquet-jackson-1.12.0.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9//py4j-0.10.9.jar pyrolite/4.30//pyrolite-4.30.jar diff --git a/pom.xml b/pom.xml index 1b1cdfc605efd..4189658a59b47 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ 2.6.0 10.12.1.1 - 1.10.1 + 1.12.0 1.6.11 9.4.36.v20210114 4.0.3 @@ -2131,7 +2131,7 @@ ${hive.group} hive-service-rpc - + org.apache.parquet parquet-hadoop-bundle diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index ab831111be513..c1277e46082ba 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1529,7 +1529,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") - val expectedSize = 601 + val expectedSize = 657 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName)