@@ -29,6 +29,20 @@ if [ -z "${SPARK_HOME}" ]; then
2929fi
3030. " ${SPARK_HOME} /bin/load-spark-env.sh"
3131
32+ CTX_DIR=" $SPARK_HOME /target/tmp/docker"
33+
34+ function is_dev_build {
35+ [ ! -f " $SPARK_HOME /RELEASE" ]
36+ }
37+
38+ function cleanup_ctx_dir {
39+ if is_dev_build; then
40+ rm -rf " $CTX_DIR "
41+ fi
42+ }
43+
44+ trap cleanup_ctx_dir EXIT
45+
3246function image_ref {
3347 local image=" $1 "
3448 local add_repo=" ${2:- 1} "
@@ -53,80 +67,114 @@ function docker_push {
5367 fi
5468}
5569
70+ # Create a smaller build context for docker in dev builds to make the build faster. Docker
71+ # uploads all of the current directory to the daemon, and it can get pretty big with dev
72+ # builds that contain test log files and other artifacts.
73+ #
74+ # Three build contexts are created, one for each image: base, pyspark, and sparkr. For them
75+ # to have the desired effect, the docker command needs to be executed inside the appropriate
76+ # context directory.
77+ #
78+ # Note: docker does not support symlinks in the build context.
79+ function create_dev_build_context {(
80+ set -e
81+ local BASE_CTX=" $CTX_DIR /base"
82+ mkdir -p " $BASE_CTX /kubernetes"
83+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
84+ " $BASE_CTX /kubernetes/dockerfiles"
85+
86+ cp -r " assembly/target/scala-$SPARK_SCALA_VERSION /jars" " $BASE_CTX /jars"
87+ cp -r " resource-managers/kubernetes/integration-tests/tests" \
88+ " $BASE_CTX /kubernetes/tests"
89+
90+ mkdir " $BASE_CTX /examples"
91+ cp -r " examples/src" " $BASE_CTX /examples/src"
92+ # Copy just needed examples jars instead of everything.
93+ mkdir " $BASE_CTX /examples/jars"
94+ for i in examples/target/scala-$SPARK_SCALA_VERSION /jars/* ; do
95+ if [ ! -f " $BASE_CTX /jars/$( basename $i ) " ]; then
96+ cp $i " $BASE_CTX /examples/jars"
97+ fi
98+ done
99+
100+ for other in bin sbin data; do
101+ cp -r " $other " " $BASE_CTX /$other "
102+ done
103+
104+ local PYSPARK_CTX=" $CTX_DIR /pyspark"
105+ mkdir -p " $PYSPARK_CTX /kubernetes"
106+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
107+ " $PYSPARK_CTX /kubernetes/dockerfiles"
108+ mkdir " $PYSPARK_CTX /python"
109+ cp -r " python/lib" " $PYSPARK_CTX /python/lib"
110+
111+ local R_CTX=" $CTX_DIR /sparkr"
112+ mkdir -p " $R_CTX /kubernetes"
113+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
114+ " $R_CTX /kubernetes/dockerfiles"
115+ cp -r " R" " $R_CTX /R"
116+ )}
117+
118+ function img_ctx_dir {
119+ if is_dev_build; then
120+ echo " $CTX_DIR /$1 "
121+ else
122+ echo " $SPARK_HOME "
123+ fi
124+ }
125+
56126function build {
57127 local BUILD_ARGS
58- local IMG_PATH
59- local JARS
60-
61- if [ ! -f " $SPARK_HOME /RELEASE" ]; then
62- # Set image build arguments accordingly if this is a source repo and not a distribution archive.
63- #
64- # Note that this will copy all of the example jars directory into the image, and that will
65- # contain a lot of duplicated jars with the main Spark directory. In a proper distribution,
66- # the examples directory is cleaned up before generating the distribution tarball, so this
67- # issue does not occur.
68- IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
69- JARS=assembly/target/scala-$SPARK_SCALA_VERSION /jars
70- BUILD_ARGS=(
71- ${BUILD_PARAMS}
72- --build-arg
73- img_path=$IMG_PATH
74- --build-arg
75- spark_jars=$JARS
76- --build-arg
77- example_jars=examples/target/scala-$SPARK_SCALA_VERSION /jars
78- --build-arg
79- k8s_tests=resource-managers/kubernetes/integration-tests/tests
80- )
81- else
82- # Not passed as arguments to docker, but used to validate the Spark directory.
83- IMG_PATH=" kubernetes/dockerfiles"
84- JARS=jars
85- BUILD_ARGS=(${BUILD_PARAMS} )
128+ local SPARK_ROOT=" $SPARK_HOME "
129+
130+ if is_dev_build; then
131+ create_dev_build_context || error " Failed to create docker build context."
132+ SPARK_ROOT=" $CTX_DIR /base"
86133 fi
87134
88135 # Verify that the Docker image content directory is present
89- if [ ! -d " $IMG_PATH " ]; then
136+ if [ ! -d " $SPARK_ROOT /kubernetes/dockerfiles " ]; then
90137 error " Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
91138 fi
92139
93140 # Verify that Spark has actually been built/is a runnable distribution
94141 # i.e. the Spark JARs that the Docker files will place into the image are present
95- local TOTAL_JARS=$( ls $JARS /spark-* | wc -l)
142+ local TOTAL_JARS=$( ls $SPARK_ROOT /jars /spark-* | wc -l)
96143 TOTAL_JARS=$(( $TOTAL_JARS ))
97144 if [ " ${TOTAL_JARS} " -eq 0 ]; then
98145 error " Cannot find Spark JARs. This script assumes that Apache Spark has first been built locally or this is a runnable distribution."
99146 fi
100147
148+ local BUILD_ARGS=(${BUILD_PARAMS} )
101149 local BINDING_BUILD_ARGS=(
102150 ${BUILD_PARAMS}
103151 --build-arg
104152 base_img=$( image_ref spark)
105153 )
106- local BASEDOCKERFILE=${BASEDOCKERFILE:- " $IMG_PATH /spark/Dockerfile" }
154+ local BASEDOCKERFILE=${BASEDOCKERFILE:- " kubernetes/dockerfiles /spark/Dockerfile" }
107155 local PYDOCKERFILE=${PYDOCKERFILE:- false}
108156 local RDOCKERFILE=${RDOCKERFILE:- false}
109157
110- docker build $NOCACHEARG " ${BUILD_ARGS[@]} " \
158+ (cd $( img_ctx_dir base ) && docker build $NOCACHEARG " ${BUILD_ARGS[@]} " \
111159 -t $( image_ref spark) \
112- -f " $BASEDOCKERFILE " .
160+ -f " $BASEDOCKERFILE " .)
113161 if [ $? -ne 0 ]; then
114162 error " Failed to build Spark JVM Docker image, please refer to Docker build output for details."
115163 fi
116164
117165 if [ " ${PYDOCKERFILE} " != " false" ]; then
118- docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
166+ (cd $( img_ctx_dir pyspark ) && docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
119167 -t $( image_ref spark-py) \
120- -f " $PYDOCKERFILE " .
168+ -f " $PYDOCKERFILE " .)
121169 if [ $? -ne 0 ]; then
122170 error " Failed to build PySpark Docker image, please refer to Docker build output for details."
123171 fi
124172 fi
125173
126174 if [ " ${RDOCKERFILE} " != " false" ]; then
127- docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
175+ (cd $( img_ctx_dir sparkr ) && docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
128176 -t $( image_ref spark-r) \
129- -f " $RDOCKERFILE " .
177+ -f " $RDOCKERFILE " .)
130178 if [ $? -ne 0 ]; then
131179 error " Failed to build SparkR Docker image, please refer to Docker build output for details."
132180 fi
0 commit comments