@@ -29,6 +29,20 @@ if [ -z "${SPARK_HOME}" ]; then
2929fi
3030. " ${SPARK_HOME} /bin/load-spark-env.sh"
3131
32+ CTX_DIR=" $SPARK_HOME /target/tmp/docker"
33+
34+ function is_dev_build {
35+ [ ! -f " $SPARK_HOME /RELEASE" ]
36+ }
37+
38+ function cleanup_ctx_dir {
39+ if is_dev_build; then
40+ rm -rf " $CTX_DIR "
41+ fi
42+ }
43+
44+ trap cleanup_ctx_dir EXIT
45+
3246function image_ref {
3347 local image=" $1 "
3448 local add_repo=" ${2:- 1} "
@@ -41,76 +55,111 @@ function image_ref {
4155 echo " $image "
4256}
4357
58+ # Create a smaller build context for docker in dev builds to make the build faster. Docker
59+ # uploads all of the current directory to the daemon, and it can get pretty big with dev
60+ # builds that contain test log files and other artifacts.
61+ #
62+ # Three build contexts are created, one for each image: base, pyspark, and sparkr. For them
63+ # to have the desired effect, the docker command needs to be executed inside the appropriate
64+ # context directory.
65+ #
66+ # Note: docker does not support symlinks in the build context.
67+ function create_dev_build_context {(
68+ set -e
69+ local BASE_CTX=" $CTX_DIR /base"
70+ mkdir -p " $BASE_CTX /kubernetes"
71+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
72+ " $BASE_CTX /kubernetes/dockerfiles"
73+
74+ cp -r " assembly/target/scala-$SPARK_SCALA_VERSION /jars" " $BASE_CTX /jars"
75+ cp -r " resource-managers/kubernetes/integration-tests/tests" \
76+ " $BASE_CTX /kubernetes/tests"
77+
78+ mkdir " $BASE_CTX /examples"
79+ cp -r " examples/src" " $BASE_CTX /examples/src"
80+ # Copy just needed examples jars instead of everything.
81+ mkdir " $BASE_CTX /examples/jars"
82+ for i in examples/target/scala-$SPARK_SCALA_VERSION /jars/* ; do
83+ if [ ! -f " $BASE_CTX /jars/$( basename $i ) " ]; then
84+ cp $i " $BASE_CTX /examples/jars"
85+ fi
86+ done
87+
88+ for other in bin sbin data; do
89+ cp -r " $other " " $BASE_CTX /$other "
90+ done
91+
92+ local PYSPARK_CTX=" $CTX_DIR /pyspark"
93+ mkdir -p " $PYSPARK_CTX /kubernetes"
94+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
95+ " $PYSPARK_CTX /kubernetes/dockerfiles"
96+ mkdir " $PYSPARK_CTX /python"
97+ cp -r " python/lib" " $PYSPARK_CTX /python/lib"
98+
99+ local R_CTX=" $CTX_DIR /sparkr"
100+ mkdir -p " $R_CTX /kubernetes"
101+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
102+ " $R_CTX /kubernetes/dockerfiles"
103+ cp -r " R" " $R_CTX /R"
104+ )}
105+
106+ function img_ctx_dir {
107+ if is_dev_build; then
108+ echo " $CTX_DIR /$1 "
109+ else
110+ echo " $SPARK_HOME "
111+ fi
112+ }
113+
44114function build {
45115 local BUILD_ARGS
46- local IMG_PATH
47- local JARS
48-
49- if [ ! -f " $SPARK_HOME /RELEASE" ]; then
50- # Set image build arguments accordingly if this is a source repo and not a distribution archive.
51- #
52- # Note that this will copy all of the example jars directory into the image, and that will
53- # contain a lot of duplicated jars with the main Spark directory. In a proper distribution,
54- # the examples directory is cleaned up before generating the distribution tarball, so this
55- # issue does not occur.
56- IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
57- JARS=assembly/target/scala-$SPARK_SCALA_VERSION /jars
58- BUILD_ARGS=(
59- ${BUILD_PARAMS}
60- --build-arg
61- img_path=$IMG_PATH
62- --build-arg
63- spark_jars=$JARS
64- --build-arg
65- example_jars=examples/target/scala-$SPARK_SCALA_VERSION /jars
66- --build-arg
67- k8s_tests=resource-managers/kubernetes/integration-tests/tests
68- )
69- else
70- # Not passed as arguments to docker, but used to validate the Spark directory.
71- IMG_PATH=" kubernetes/dockerfiles"
72- JARS=jars
73- BUILD_ARGS=(${BUILD_PARAMS} )
116+ local SPARK_ROOT=" $SPARK_HOME "
117+
118+ if is_dev_build; then
119+ create_dev_build_context
120+ SPARK_ROOT=" $CTX_DIR /base"
74121 fi
75122
76123 # Verify that the Docker image content directory is present
77- if [ ! -d " $IMG_PATH " ]; then
124+ if [ ! -d " $SPARK_ROOT /kubernetes/dockerfiles " ]; then
78125 error " Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
79126 fi
80127
81128 # Verify that Spark has actually been built/is a runnable distribution
82129 # i.e. the Spark JARs that the Docker files will place into the image are present
83- local TOTAL_JARS=$( ls $JARS /spark-* | wc -l)
130+ local TOTAL_JARS=$( ls $SPARK_ROOT /jars /spark-* | wc -l)
84131 TOTAL_JARS=$(( $TOTAL_JARS ))
85132 if [ " ${TOTAL_JARS} " -eq 0 ]; then
86133 error " Cannot find Spark JARs. This script assumes that Apache Spark has first been built locally or this is a runnable distribution."
87134 fi
88135
136+ local BUILD_ARGS=(${BUILD_PARAMS} )
89137 local BINDING_BUILD_ARGS=(
90138 ${BUILD_PARAMS}
91139 --build-arg
92140 base_img=$( image_ref spark)
93141 )
94- local BASEDOCKERFILE=${BASEDOCKERFILE:- " $IMG_PATH /spark/Dockerfile" }
95- local PYDOCKERFILE=${PYDOCKERFILE:- " $IMG_PATH /spark/bindings/python/Dockerfile" }
96- local RDOCKERFILE=${RDOCKERFILE:- " $IMG_PATH /spark/bindings/R/Dockerfile" }
142+ local BASEDOCKERFILE=${BASEDOCKERFILE:- " kubernetes/dockerfiles /spark/Dockerfile" }
143+ local PYDOCKERFILE=${PYDOCKERFILE:- " kubernetes/dockerfiles /spark/bindings/python/Dockerfile" }
144+ local RDOCKERFILE=${RDOCKERFILE:- " kubernetes/dockerfiles /spark/bindings/R/Dockerfile" }
97145
98- docker build $NOCACHEARG " ${BUILD_ARGS[@]} " \
146+ (cd $( img_ctx_dir base ) && docker build $NOCACHEARG " ${BUILD_ARGS[@]} " \
99147 -t $( image_ref spark) \
100- -f " $BASEDOCKERFILE " .
148+ -f " $BASEDOCKERFILE " .)
101149 if [ $? -ne 0 ]; then
102150 error " Failed to build Spark JVM Docker image, please refer to Docker build output for details."
103151 fi
104152
105- docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
153+ (cd $( img_ctx_dir pyspark ) && docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
106154 -t $( image_ref spark-py) \
107- -f " $PYDOCKERFILE " .
155+ -f " $PYDOCKERFILE " .)
108156 if [ $? -ne 0 ]; then
109157 error " Failed to build PySpark Docker image, please refer to Docker build output for details."
110158 fi
111- docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
159+
160+ (cd $( img_ctx_dir sparkr) && docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
112161 -t $( image_ref spark-r) \
113- -f " $RDOCKERFILE " .
162+ -f " $RDOCKERFILE " .)
114163 if [ $? -ne 0 ]; then
115164 error " Failed to build SparkR Docker image, please refer to Docker build output for details."
116165 fi
0 commit comments