1919
2020# Run test suites and indivisual test suite.
2121#
22- # Usage: run-tests [core|sql|mllib|ml|streaming]
22+ # Usage: run-tests [-v python version] [ core|sql|mllib|ml|streaming]
2323#
2424# When you select none, all test suites are run. You can also select
2525# multiple test suites.
2626
2727function usage() {
28- echo " Usage: run-tests [core|sql|mllib|ml|streaming]"
28+ echo " Usage: run-tests [-v python version] [ core|sql|mllib|ml|streaming]"
2929}
3030
31+ SUPPORT_PYTHON_VERSIONS=" 2.6 3.4 pypy"
32+ TARGET_PYTHON_VERSION=
33+ if [ $# != 0 -a $1 == " -v" ]; then
34+ TARGET_PYTHON_VERSION=$2
35+ shift
36+ shift
37+ fi
38+
3139if [ $# == 0 ]; then
3240 DO_CORE_TESTS=1
3341 DO_SQL_TESTS=1
6775 done
6876fi
6977
78+ # set the directory that this script stored in.
79+ SCRIPT_DIR=" $( cd " ` dirname " $0 " ` " ; pwd) "
80+
7081# Figure out where the Spark framework is installed
7182FWDIR=" $( cd " ` dirname " $0 " ` " ; cd ../; pwd) "
7283
@@ -75,177 +86,37 @@ FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
7586# CD into the python directory to find things on the right path
7687cd " $FWDIR /python"
7788
78- FAILED=0
7989LOG_FILE=unit-tests.log
80- START=$( date +" %s" )
8190
8291rm -f $LOG_FILE
8392
84- # Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
85- rm -rf metastore warehouse
86-
87- function run_test() {
88- echo -en " Running test: $1 ... " | tee -a $LOG_FILE
89- start=$( date +" %s" )
90- SPARK_TESTING=1 time " $FWDIR " /bin/pyspark $1 > $LOG_FILE 2>&1
91-
92- FAILED=$(( PIPESTATUS[0 ]|| $FAILED ))
93-
94- # Fail and exit on the first test failure.
95- if [[ $FAILED != 0 ]]; then
96- cat $LOG_FILE | grep -v " ^[0-9][0-9]*" # filter all lines starting with a number.
97- echo -en " \033[31m" # Red
98- echo " Had test failures; see logs."
99- echo -en " \033[0m" # No color
100- exit -1
101- else
102- now=$( date +" %s" )
103- echo " ok ($(( $now - $start )) s)"
104- fi
105- }
106-
107- function run_core_tests() {
108- if [ $DO_CORE_TESTS == 0 ]; then
109- return 0
110- fi
111-
112- echo " Run core tests ..."
113- run_test " pyspark.rdd"
114- run_test " pyspark.context"
115- run_test " pyspark.conf"
116- run_test " pyspark.broadcast"
117- run_test " pyspark.accumulators"
118- run_test " pyspark.serializers"
119- run_test " pyspark.profiler"
120- run_test " pyspark.shuffle"
121- run_test " pyspark.tests"
122- }
93+ echo " Running PySpark tests. Output is in python/$LOG_FILE ."
12394
124- function run_sql_tests() {
125- if [ $DO_SQL_TESTS == 0 ]; then
126- return 0
95+ if [ -n " $TARGET_PYTHON_VERSION " ]; then
96+ if [[ ! " $SUPPORT_PYTHON_VERSIONS " =~ " $TARGET_PYTHON_VERSION " ]]; then
97+ echo " python $TARGET_PYTHON_VERSION is not supported."
98+ exit 1
12799 fi
128100
129- echo " Run sql tests ..."
130- run_test " pyspark.sql.types"
131- run_test " pyspark.sql.context"
132- run_test " pyspark.sql.column"
133- run_test " pyspark.sql.dataframe"
134- run_test " pyspark.sql.group"
135- run_test " pyspark.sql.functions"
136- run_test " pyspark.sql.readwriter"
137- run_test " pyspark.sql.window"
138- run_test " pyspark.sql.tests"
139- }
101+ SUPPORT_PYTHON_VERSIONS=$TARGET_PYTHON_VERSION
102+ fi
140103
141- function run_mllib_tests() {
142- if [ $DO_MLLIB_TESTS == 0 ] ; then
143- return 0
144- fi
104+ for PYVERSION in $SUPPORT_PYTHON_VERSIONS ;
105+ do
106+ # Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
107+ rm -rf metastore warehouse
145108
146- echo " Run mllib tests ..."
147- run_test " pyspark.mllib.classification"
148- run_test " pyspark.mllib.clustering"
149- run_test " pyspark.mllib.evaluation"
150- run_test " pyspark.mllib.feature"
151- run_test " pyspark.mllib.fpm"
152- run_test " pyspark.mllib.linalg"
153- run_test " pyspark.mllib.random"
154- run_test " pyspark.mllib.recommendation"
155- run_test " pyspark.mllib.regression"
156- run_test " pyspark.mllib.stat._statistics"
157- run_test " pyspark.mllib.stat.KernelDensity"
158- run_test " pyspark.mllib.tree"
159- run_test " pyspark.mllib.util"
160- run_test " pyspark.mllib.tests"
161- }
109+ START=$( date +" %s" )
110+ FAILED=0
162111
163- function run_ml_tests() {
164- if [ $DO_ML_TESTS == 0 ]; then
165- return 0
112+ if [ -f " ${SCRIPT_DIR} /tests/$PYVERSION .sh" ]; then
113+ . " ${SCRIPT_DIR} /tests/$PYVERSION .sh"
114+ else
115+ . " ${SCRIPT_DIR} /tests/default" $PYVERSION
166116 fi
167117
168- echo " Run ml tests ..."
169- run_test " pyspark.ml.feature"
170- run_test " pyspark.ml.classification"
171- run_test " pyspark.ml.recommendation"
172- run_test " pyspark.ml.regression"
173- run_test " pyspark.ml.tuning"
174- run_test " pyspark.ml.tests"
175- run_test " pyspark.ml.evaluation"
176- }
177-
178- function run_streaming_tests() {
179- if [ $DO_STREAMING_TESTS == 0 ]; then
180- return 0
118+ if [[ $FAILED == 0 ]]; then
119+ now=$( date +" %s" )
120+ echo -e " \033[32mTests passed \033[0min $(( $now - $START )) seconds"
181121 fi
182-
183- echo " Run streaming tests ..."
184-
185- KAFKA_ASSEMBLY_DIR=" $FWDIR " /external/kafka-assembly
186- JAR_PATH=" ${KAFKA_ASSEMBLY_DIR} /target/scala-${SPARK_SCALA_VERSION} "
187- for f in " ${JAR_PATH} " /spark-streaming-kafka-assembly-* .jar; do
188- if [[ ! -e " $f " ]]; then
189- echo " Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR " 1>&2
190- echo " You need to build Spark with " \
191- " 'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \
192- " 'build/mvn package' before running this program" 1>&2
193- exit 1
194- fi
195- KAFKA_ASSEMBLY_JAR=" $f "
196- done
197-
198- export PYSPARK_SUBMIT_ARGS=" --jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
199- run_test " pyspark.streaming.util"
200- run_test " pyspark.streaming.tests"
201- }
202-
203- echo " Running PySpark tests. Output is in python/$LOG_FILE ."
204-
205- export PYSPARK_PYTHON=" python"
206-
207- # Try to test with Python 2.6, since that's the minimum version that we support:
208- if [ $( which python2.6) ]; then
209- export PYSPARK_PYTHON=" python2.6"
210- fi
211-
212- echo " Testing with Python version:"
213- $PYSPARK_PYTHON --version
214-
215- run_core_tests
216- run_sql_tests
217- run_mllib_tests
218- run_ml_tests
219- run_streaming_tests
220-
221- # Try to test with Python 3
222- if [ $( which python3.4) ]; then
223- export PYSPARK_PYTHON=" python3.4"
224- echo " Testing with Python3.4 version:"
225- $PYSPARK_PYTHON --version
226-
227- run_core_tests
228- run_sql_tests
229- run_mllib_tests
230- run_ml_tests
231- run_streaming_tests
232- fi
233-
234- # Try to test with PyPy
235- if [ $( which pypy) ]; then
236- export PYSPARK_PYTHON=" pypy"
237- echo " Testing with PyPy version:"
238- $PYSPARK_PYTHON --version
239-
240- run_core_tests
241- run_sql_tests
242- run_streaming_tests
243- fi
244-
245- if [[ $FAILED == 0 ]]; then
246- now=$( date +" %s" )
247- echo -e " \033[32mTests passed \033[0min $(( $now - $START )) seconds"
248- fi
249-
250- # TODO: in the long-run, it would be nice to use a test runner like `nose`.
251- # The doctest fixtures are the current barrier to doing this.
122+ done
0 commit comments