diff --git a/src/main/python/python_word_count.py b/src/main/python/python_word_count.py index 0963c1589aa69..3aa1a2a24543c 100644 --- a/src/main/python/python_word_count.py +++ b/src/main/python/python_word_count.py @@ -3,6 +3,7 @@ import sys from pyspark.sql import SparkSession +from pyspark.conf import SparkConf if __name__ == "__main__": @@ -10,8 +11,15 @@ print("Usage: python_word_count ", file=sys.stderr) sys.exit(-1) + # These confs override the default configuration for spark applications during runtime. + conf = SparkConf()\ + .set('spark.executor.memory', '4g')\ + .set('spark.executor.cores', '2')\ + .set('spark.driver.memory', '8g') + spark = SparkSession\ .builder\ + .config(conf=conf)\ .appName("Python Word Count")\ .getOrCreate() diff --git a/src/main/scala/com/yahoo/spark/starter/ScalaWordCount.scala b/src/main/scala/com/yahoo/spark/starter/ScalaWordCount.scala index 45997a360c9df..9d44ef7133e5f 100644 --- a/src/main/scala/com/yahoo/spark/starter/ScalaWordCount.scala +++ b/src/main/scala/com/yahoo/spark/starter/ScalaWordCount.scala @@ -24,7 +24,7 @@ object ScalaWordCount { // Get a logger on the Driver/AppMaster val logger = SparkStarterUtil.logger - + logger.info("Input : " + inputFilesUri) logger.info("Output : " + outputFilesUri)