From f410f3abc44a62e1c75c99669a7f0310a3fb828d Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Tue, 8 Aug 2023 13:40:33 +0200 Subject: [PATCH 1/4] Match ColumnVector memory-mode config default to OffHeapMemoryMode config value --- .../apache/spark/sql/internal/SQLConf.scala | 8 +++-- .../ConfigColumnVectorModeDefaultSuite.scala | 35 +++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3093f0c137815..f17490e19dc4c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -502,10 +502,14 @@ object SQLConf { val COLUMN_VECTOR_OFFHEAP_ENABLED = buildConf("spark.sql.columnVector.offheap.enabled") .internal() - .doc("When true, use OffHeapColumnVector in ColumnarBatch.") + .doc("When true, use OffHeapColumnVector in ColumnarBatch. " + + s"Defaults to $MEMORY_OFFHEAP_ENABLED && $MEMORY_OFFHEAP_SIZE > 0.") .version("2.3.0") .booleanConf - .createWithDefault(false) + .createWithDefaultFunction(() => { + val conf = org.apache.spark.SparkEnv.get.conf + conf.get(MEMORY_OFFHEAP_ENABLED) && conf.get(MEMORY_OFFHEAP_SIZE) > 0 + }) val PREFER_SORTMERGEJOIN = buildConf("spark.sql.join.preferSortMergeJoin") .internal() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala new file mode 100644 index 0000000000000..a1edbda39b593 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.SparkConf +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +class ConfigColumnVectorModeDefaultSuite extends SharedSparkSession { + + override def sparkConf: SparkConf = { + super.sparkConf + .set("spark.memory.offHeap.size", (1024L * 1024L).toString) + .set("spark.memory.offHeap.enabled", "true") + } + + test("Off-heap column vector should be enabled by default when off-heap mode is used") { + assert(SQLConf.get.offHeapColumnVectorEnabled) + } +} + From b167c1d2a003b71931c0874377dfe5d1d1ce5a59 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Wed, 9 Aug 2023 10:22:48 +0200 Subject: [PATCH 2/4] Address comment --- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index f17490e19dc4c..17ab12576432d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -503,13 +503,9 @@ object SQLConf { buildConf("spark.sql.columnVector.offheap.enabled") .internal() .doc("When true, use OffHeapColumnVector in ColumnarBatch. " + - s"Defaults to $MEMORY_OFFHEAP_ENABLED && $MEMORY_OFFHEAP_SIZE > 0.") + s"Defaults to $MEMORY_OFFHEAP_ENABLED.") .version("2.3.0") - .booleanConf - .createWithDefaultFunction(() => { - val conf = org.apache.spark.SparkEnv.get.conf - conf.get(MEMORY_OFFHEAP_ENABLED) && conf.get(MEMORY_OFFHEAP_SIZE) > 0 - }) + .fallbackConf(MEMORY_OFFHEAP_ENABLED) val PREFER_SORTMERGEJOIN = buildConf("spark.sql.join.preferSortMergeJoin") .internal() From 7618da740da2904930ebb6325913f5918c32fe92 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Fri, 11 Aug 2023 10:14:46 +0200 Subject: [PATCH 3/4] Update ConfigColumnVectorModeDefaultSuite.scala --- .../apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala index a1edbda39b593..add21d81af762 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import org.apache.spark.SparkConf import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession + class ConfigColumnVectorModeDefaultSuite extends SharedSparkSession { override def sparkConf: SparkConf = { From 0069d4a12640b0653549fb994a3c3bd5f132a54d Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Tue, 15 Aug 2023 06:06:50 +0200 Subject: [PATCH 4/4] Delete ConfigColumnVectorModeDefaultSuite.scala --- .../ConfigColumnVectorModeDefaultSuite.scala | 36 ------------------- 1 file changed, 36 deletions(-) delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala deleted file mode 100644 index add21d81af762..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/ConfigColumnVectorModeDefaultSuite.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import org.apache.spark.SparkConf -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession - -class ConfigColumnVectorModeDefaultSuite extends SharedSparkSession { - - override def sparkConf: SparkConf = { - super.sparkConf - .set("spark.memory.offHeap.size", (1024L * 1024L).toString) - .set("spark.memory.offHeap.enabled", "true") - } - - test("Off-heap column vector should be enabled by default when off-heap mode is used") { - assert(SQLConf.get.offHeapColumnVectorEnabled) - } -} -