-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
20 lines (15 loc) · 848 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
FROM jupyter/pyspark-notebook:latest
ARG DELTA_CORE_VERSION="1.0.0"
RUN pip install --quiet --no-cache-dir delta-spark==${DELTA_CORE_VERSION} && \
fix-permissions "${HOME}" && \
fix-permissions "${CONDA_DIR}"
USER root
RUN echo 'spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension' >> "${SPARK_HOME}/conf/spark-defaults.conf" && \
echo 'spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog' >> "${SPARK_HOME}/conf/spark-defaults.conf"
USER ${NB_UID}
# Trigger download of delta lake files
RUN echo "from pyspark.sql import SparkSession" > /tmp/init-delta.py && \
echo "from delta import *" >> /tmp/init-delta.py && \
echo "spark = configure_spark_with_delta_pip(SparkSession.builder).getOrCreate()" >> /tmp/init-delta.py && \
python /tmp/init-delta.py && \
rm /tmp/init-delta.py