[SPARK-14056] Appends s3 specific configurations and spark.hadoop con…

## What changes were proposed in this pull request? Appends s3 specific configurations and spark.hadoop configurations to hive configuration. ## How was this patch tested? Tested by running a job on cluster. …figurations to hive configuration. Author: Sital Kedia <[email protected]> Closes #11876 from sitalkedia/hiveConf.
apache · Apr 3, 2016 · 1cf7018 · 1cf7018
1 parent 03d130f
commit 1cf7018
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 8 deletions.
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -74,13 +74,12 @@ class SparkHadoopUtil extends Logging {
     }
   }
 
-  /**
-   * Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop
-   * subsystems.
-   */
-  def newConfiguration(conf: SparkConf): Configuration = {
-    val hadoopConf = new Configuration()
 
+  /**
+    * Appends S3-specific, spark.hadoop.*, and spark.buffer.size configurations to a Hadoop
+    * configuration.
+    */
+  def appendS3AndSparkHadoopConfigurations(conf: SparkConf, hadoopConf: Configuration): Unit = {
     // Note: this null check is around more than just access to the "conf" object to maintain
     // the behavior of the old implementation of this code, for backwards compatibility.
     if (conf != null) {
@@ -106,7 +105,15 @@ class SparkHadoopUtil extends Logging {
       val bufferSize = conf.get("spark.buffer.size", "65536")
       hadoopConf.set("io.file.buffer.size", bufferSize)
     }
+  }
 
+  /**
+    * Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop
+    * subsystems.
+    */
+  def newConfiguration(conf: SparkConf): Configuration = {
+    val hadoopConf = new Configuration()
+    appendS3AndSparkHadoopConfigurations(conf, hadoopConf)
     hadoopConf
   }
 

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -34,6 +34,7 @@ import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
 
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -74,8 +75,7 @@ class HadoopTableReader(
     math.max(sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinPartitions)
   }
 
-  // TODO: set aws s3 credentials.
-
+  SparkHadoopUtil.get.appendS3AndSparkHadoopConfigurations(sc.sparkContext.conf, hiveExtraConf)
   private val _broadcastedHiveConf =
     sc.sparkContext.broadcast(new SerializableConfiguration(hiveExtraConf))