diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt index db23cf5c12ea7..4b16bd445f39d 100644 --- a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt +++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt @@ -1,64 +1,64 @@ -OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 10 blocks w/ broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 170 178 9 1.2 849.7 1.0X -Deserialization 530 535 9 0.4 2651.1 0.3X +------------------------------------------------------------------------------------------------------------------------- +Serialization 175 183 12 1.1 874.1 1.0X +Deserialization 458 462 6 0.4 2288.6 0.4X -Compressed Serialized MapStatus sizes: 411 bytes +Compressed Serialized MapStatus sizes: 410 bytes Compressed Serialized Broadcast MapStatus sizes: 2 MB -OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 10 blocks w/o broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 157 165 7 1.3 785.4 1.0X -Deserialization 495 588 79 0.4 2476.7 0.3X +-------------------------------------------------------------------------------------------------------------------------- +Serialization 160 171 8 1.2 801.1 1.0X +Deserialization 453 484 38 0.4 2263.4 0.4X Compressed Serialized MapStatus sizes: 2 MB Compressed Serialized Broadcast MapStatus sizes: 0 bytes -OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 100 blocks w/ broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 344 351 4 0.6 1720.4 1.0X -Deserialization 527 579 99 0.4 2635.9 0.7X +-------------------------------------------------------------------------------------------------------------------------- +Serialization 343 346 2 0.6 1717.1 1.0X +Deserialization 492 540 59 0.4 2459.6 0.7X -Compressed Serialized MapStatus sizes: 427 bytes +Compressed Serialized MapStatus sizes: 426 bytes Compressed Serialized Broadcast MapStatus sizes: 13 MB -OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 100 blocks w/o broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 317 321 4 0.6 1583.8 1.0X -Deserialization 530 540 15 0.4 2648.3 0.6X +--------------------------------------------------------------------------------------------------------------------------- +Serialization 297 299 2 0.7 1486.2 1.0X +Deserialization 489 535 87 0.4 2446.5 0.6X Compressed Serialized MapStatus sizes: 13 MB Compressed Serialized Broadcast MapStatus sizes: 0 bytes -OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 1000 blocks w/ broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 1738 1849 156 0.1 8692.0 1.0X -Deserialization 946 977 33 0.2 4730.2 1.8X +--------------------------------------------------------------------------------------------------------------------------- +Serialization 1641 1819 252 0.1 8204.1 1.0X +Deserialization 844 882 37 0.2 4219.7 1.9X -Compressed Serialized MapStatus sizes: 556 bytes +Compressed Serialized MapStatus sizes: 553 bytes Compressed Serialized Broadcast MapStatus sizes: 121 MB -OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 1000 blocks w/o broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 1379 1432 76 0.1 6892.6 1.0X -Deserialization 929 941 19 0.2 4645.5 1.5X +---------------------------------------------------------------------------------------------------------------------------- +Serialization 1360 1412 73 0.1 6799.3 1.0X +Deserialization 850 859 13 0.2 4249.9 1.6X Compressed Serialized MapStatus sizes: 121 MB Compressed Serialized Broadcast MapStatus sizes: 0 bytes diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt index 053f4bf771923..996b79b896dba 100644 --- a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt +++ b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt @@ -1,64 +1,64 @@ -OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 10 blocks w/ broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 178 187 15 1.1 887.5 1.0X -Deserialization 530 558 32 0.4 2647.5 0.3X +------------------------------------------------------------------------------------------------------------------------- +Serialization 193 202 15 1.0 966.2 1.0X +Deserialization 477 521 44 0.4 2387.5 0.4X -Compressed Serialized MapStatus sizes: 411 bytes +Compressed Serialized MapStatus sizes: 410 bytes Compressed Serialized Broadcast MapStatus sizes: 2 MB -OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 10 blocks w/o broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 167 175 7 1.2 835.7 1.0X -Deserialization 523 537 22 0.4 2616.2 0.3X +-------------------------------------------------------------------------------------------------------------------------- +Serialization 181 187 11 1.1 907.1 1.0X +Deserialization 477 492 27 0.4 2383.5 0.4X Compressed Serialized MapStatus sizes: 2 MB Compressed Serialized Broadcast MapStatus sizes: 0 bytes -OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 100 blocks w/ broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 351 416 147 0.6 1754.4 1.0X -Deserialization 546 551 8 0.4 2727.6 0.6X +-------------------------------------------------------------------------------------------------------------------------- +Serialization 359 436 162 0.6 1797.5 1.0X +Deserialization 506 514 15 0.4 2530.8 0.7X -Compressed Serialized MapStatus sizes: 427 bytes +Compressed Serialized MapStatus sizes: 426 bytes Compressed Serialized Broadcast MapStatus sizes: 13 MB -OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 100 blocks w/o broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 320 321 1 0.6 1598.0 1.0X -Deserialization 542 549 7 0.4 2709.0 0.6X +--------------------------------------------------------------------------------------------------------------------------- +Serialization 322 323 0 0.6 1610.5 1.0X +Deserialization 508 513 5 0.4 2538.4 0.6X Compressed Serialized MapStatus sizes: 13 MB Compressed Serialized Broadcast MapStatus sizes: 0 bytes -OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 1000 blocks w/ broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 1671 1877 290 0.1 8357.3 1.0X -Deserialization 943 970 32 0.2 4715.8 1.8X +--------------------------------------------------------------------------------------------------------------------------- +Serialization 1740 1903 231 0.1 8700.0 1.0X +Deserialization 872 888 24 0.2 4360.9 2.0X -Compressed Serialized MapStatus sizes: 556 bytes +Compressed Serialized MapStatus sizes: 553 bytes Compressed Serialized Broadcast MapStatus sizes: 121 MB -OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws +OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1044-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz 200000 MapOutputs, 1000 blocks w/o broadcast: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Serialization 1373 1436 89 0.1 6865.0 1.0X -Deserialization 940 970 37 0.2 4699.1 1.5X +---------------------------------------------------------------------------------------------------------------------------- +Serialization 1461 1469 11 0.1 7306.1 1.0X +Deserialization 871 889 22 0.2 4353.9 1.7X Compressed Serialized MapStatus sizes: 121 MB Compressed Serialized Broadcast MapStatus sizes: 0 bytes diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala index 7f8893ff3b9d8..6fbbebfcaccda 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala @@ -194,7 +194,7 @@ private[spark] class HighlyCompressedMapStatus private ( override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException { loc.writeExternal(out) - emptyBlocks.writeExternal(out) + emptyBlocks.serialize(out) out.writeLong(avgSize) out.writeInt(hugeBlockSizes.size) hugeBlockSizes.foreach { kv => @@ -207,7 +207,7 @@ private[spark] class HighlyCompressedMapStatus private ( override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException { loc = BlockManagerId(in) emptyBlocks = new RoaringBitmap() - emptyBlocks.readExternal(in) + emptyBlocks.deserialize(in) avgSize = in.readLong() val count = in.readInt() val hugeBlockSizesImpl = mutable.Map.empty[Int, Byte] diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 index 82d5a06f906d8..237fde70bcb65 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 @@ -1,7 +1,7 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar JavaEWAH/0.3.2//JavaEWAH-0.3.2.jar -RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar +RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.10//aircompressor-0.10.jar @@ -184,7 +184,7 @@ scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar scala-reflect/2.12.10//scala-reflect-2.12.10.jar scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar -shims/0.7.45//shims-0.7.45.jar +shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.24//snakeyaml-1.24.jar diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 17c787e42fdba..ee7cce4c40a81 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -1,7 +1,7 @@ HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar -RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar +RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.10//aircompressor-0.10.jar @@ -198,7 +198,7 @@ scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar scala-reflect/2.12.10//scala-reflect-2.12.10.jar scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar -shims/0.7.45//shims-0.7.45.jar +shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.24//snakeyaml-1.24.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index b5a10b5dba378..b3d1aca40e5be 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -1,7 +1,7 @@ HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar -RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar +RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar @@ -213,7 +213,7 @@ scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar scala-reflect/2.12.10//scala-reflect-2.12.10.jar scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar -shims/0.7.45//shims-0.7.45.jar +shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.24//snakeyaml-1.24.jar diff --git a/pom.xml b/pom.xml index 45946dde01a7a..740da2b55114c 100644 --- a/pom.xml +++ b/pom.xml @@ -694,7 +694,7 @@ org.roaringbitmap RoaringBitmap - 0.7.45 + 0.9.0 commons-net