Skip to content

Commit

Permalink
Merge pull request apache#25 from bzhang02/YSPARK-1279
Browse files Browse the repository at this point in the history
[YSPARK-1279] Add config example for pyspark
  • Loading branch information
Sanket Chintapalli authored and GitHub Enterprise committed May 18, 2020
2 parents 31facd2 + 9c0c08e commit d549adb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
8 changes: 8 additions & 0 deletions src/main/python/python_word_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,23 @@
import sys

from pyspark.sql import SparkSession
from pyspark.conf import SparkConf


if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python_word_count <input_file> <output_file>", file=sys.stderr)
sys.exit(-1)

# These confs override the default configuration for spark applications during runtime.
conf = SparkConf()\
.set('spark.executor.memory', '4g')\
.set('spark.executor.cores', '2')\
.set('spark.driver.memory', '8g')

spark = SparkSession\
.builder\
.config(conf=conf)\
.appName("Python Word Count")\
.getOrCreate()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ object ScalaWordCount {

// Get a logger on the Driver/AppMaster
val logger = SparkStarterUtil.logger

logger.info("Input : " + inputFilesUri)
logger.info("Output : " + outputFilesUri)

Expand Down

0 comments on commit d549adb

Please sign in to comment.