Skip to content

Commit

Permalink
[SPARK-14056] Appends s3 specific configurations and spark.hadoop con…
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Appends s3 specific configurations and spark.hadoop configurations to hive configuration.

## How was this patch tested?

Tested by running a job on cluster.

…figurations to hive configuration.

Author: Sital Kedia <[email protected]>

Closes #11876 from sitalkedia/hiveConf.
  • Loading branch information
Sital Kedia authored and srowen committed Apr 3, 2016
1 parent 03d130f commit 1cf7018
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
19 changes: 13 additions & 6 deletions core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,12 @@ class SparkHadoopUtil extends Logging {
}
}

/**
* Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop
* subsystems.
*/
def newConfiguration(conf: SparkConf): Configuration = {
val hadoopConf = new Configuration()

/**
* Appends S3-specific, spark.hadoop.*, and spark.buffer.size configurations to a Hadoop
* configuration.
*/
def appendS3AndSparkHadoopConfigurations(conf: SparkConf, hadoopConf: Configuration): Unit = {
// Note: this null check is around more than just access to the "conf" object to maintain
// the behavior of the old implementation of this code, for backwards compatibility.
if (conf != null) {
Expand All @@ -106,7 +105,15 @@ class SparkHadoopUtil extends Logging {
val bufferSize = conf.get("spark.buffer.size", "65536")
hadoopConf.set("io.file.buffer.size", bufferSize)
}
}

/**
* Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop
* subsystems.
*/
def newConfiguration(conf: SparkConf): Configuration = {
val hadoopConf = new Configuration()
appendS3AndSparkHadoopConfigurations(conf, hadoopConf)
hadoopConf
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
import org.apache.spark.sql.catalyst.InternalRow
Expand Down Expand Up @@ -74,8 +75,7 @@ class HadoopTableReader(
math.max(sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinPartitions)
}

// TODO: set aws s3 credentials.

SparkHadoopUtil.get.appendS3AndSparkHadoopConfigurations(sc.sparkContext.conf, hiveExtraConf)
private val _broadcastedHiveConf =
sc.sparkContext.broadcast(new SerializableConfiguration(hiveExtraConf))

Expand Down

0 comments on commit 1cf7018

Please sign in to comment.