Skip to content

Commit 5aa5aa5

Browse files
committed
Split run-tests into the main script and runner scripts for a particular python version
1 parent 0421f60 commit 5aa5aa5

File tree

4 files changed

+238
-163
lines changed

4 files changed

+238
-163
lines changed

python/run-tests

Lines changed: 34 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,23 @@
1919

2020
# Run test suites and indivisual test suite.
2121
#
22-
# Usage: run-tests [core|sql|mllib|ml|streaming]
22+
# Usage: run-tests [-v python version] [core|sql|mllib|ml|streaming]
2323
#
2424
# When you select none, all test suites are run. You can also select
2525
# multiple test suites.
2626

2727
function usage() {
28-
echo "Usage: run-tests [core|sql|mllib|ml|streaming]"
28+
echo "Usage: run-tests [-v python version] [core|sql|mllib|ml|streaming]"
2929
}
3030

31+
SUPPORT_PYTHON_VERSIONS="2.6 3.4 pypy"
32+
TARGET_PYTHON_VERSION=
33+
if [ $# != 0 -a $1 == "-v" ]; then
34+
TARGET_PYTHON_VERSION=$2
35+
shift
36+
shift
37+
fi
38+
3139
if [ $# == 0 ]; then
3240
DO_CORE_TESTS=1
3341
DO_SQL_TESTS=1
@@ -67,6 +75,9 @@ else
6775
done
6876
fi
6977

78+
# set the directory that this script stored in.
79+
SCRIPT_DIR="$(cd "`dirname "$0"`"; pwd)"
80+
7081
# Figure out where the Spark framework is installed
7182
FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
7283

@@ -75,177 +86,37 @@ FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
7586
# CD into the python directory to find things on the right path
7687
cd "$FWDIR/python"
7788

78-
FAILED=0
7989
LOG_FILE=unit-tests.log
80-
START=$(date +"%s")
8190

8291
rm -f $LOG_FILE
8392

84-
# Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
85-
rm -rf metastore warehouse
86-
87-
function run_test() {
88-
echo -en "Running test: $1 ... " | tee -a $LOG_FILE
89-
start=$(date +"%s")
90-
SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 > $LOG_FILE 2>&1
91-
92-
FAILED=$((PIPESTATUS[0]||$FAILED))
93-
94-
# Fail and exit on the first test failure.
95-
if [[ $FAILED != 0 ]]; then
96-
cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
97-
echo -en "\033[31m" # Red
98-
echo "Had test failures; see logs."
99-
echo -en "\033[0m" # No color
100-
exit -1
101-
else
102-
now=$(date +"%s")
103-
echo "ok ($(($now - $start))s)"
104-
fi
105-
}
106-
107-
function run_core_tests() {
108-
if [ $DO_CORE_TESTS == 0 ]; then
109-
return 0
110-
fi
111-
112-
echo "Run core tests ..."
113-
run_test "pyspark.rdd"
114-
run_test "pyspark.context"
115-
run_test "pyspark.conf"
116-
run_test "pyspark.broadcast"
117-
run_test "pyspark.accumulators"
118-
run_test "pyspark.serializers"
119-
run_test "pyspark.profiler"
120-
run_test "pyspark.shuffle"
121-
run_test "pyspark.tests"
122-
}
93+
echo "Running PySpark tests. Output is in python/$LOG_FILE."
12394

124-
function run_sql_tests() {
125-
if [ $DO_SQL_TESTS == 0 ]; then
126-
return 0
95+
if [ -n "$TARGET_PYTHON_VERSION" ]; then
96+
if [[ ! "$SUPPORT_PYTHON_VERSIONS" =~ "$TARGET_PYTHON_VERSION" ]]; then
97+
echo "python $TARGET_PYTHON_VERSION is not supported."
98+
exit 1
12799
fi
128100

129-
echo "Run sql tests ..."
130-
run_test "pyspark.sql.types"
131-
run_test "pyspark.sql.context"
132-
run_test "pyspark.sql.column"
133-
run_test "pyspark.sql.dataframe"
134-
run_test "pyspark.sql.group"
135-
run_test "pyspark.sql.functions"
136-
run_test "pyspark.sql.readwriter"
137-
run_test "pyspark.sql.window"
138-
run_test "pyspark.sql.tests"
139-
}
101+
SUPPORT_PYTHON_VERSIONS=$TARGET_PYTHON_VERSION
102+
fi
140103

141-
function run_mllib_tests() {
142-
if [ $DO_MLLIB_TESTS == 0 ]; then
143-
return 0
144-
fi
104+
for PYVERSION in $SUPPORT_PYTHON_VERSIONS;
105+
do
106+
# Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
107+
rm -rf metastore warehouse
145108

146-
echo "Run mllib tests ..."
147-
run_test "pyspark.mllib.classification"
148-
run_test "pyspark.mllib.clustering"
149-
run_test "pyspark.mllib.evaluation"
150-
run_test "pyspark.mllib.feature"
151-
run_test "pyspark.mllib.fpm"
152-
run_test "pyspark.mllib.linalg"
153-
run_test "pyspark.mllib.random"
154-
run_test "pyspark.mllib.recommendation"
155-
run_test "pyspark.mllib.regression"
156-
run_test "pyspark.mllib.stat._statistics"
157-
run_test "pyspark.mllib.stat.KernelDensity"
158-
run_test "pyspark.mllib.tree"
159-
run_test "pyspark.mllib.util"
160-
run_test "pyspark.mllib.tests"
161-
}
109+
START=$(date +"%s")
110+
FAILED=0
162111

163-
function run_ml_tests() {
164-
if [ $DO_ML_TESTS == 0 ]; then
165-
return 0
112+
if [ -f "${SCRIPT_DIR}/tests/$PYVERSION.sh" ]; then
113+
. "${SCRIPT_DIR}/tests/$PYVERSION.sh"
114+
else
115+
. "${SCRIPT_DIR}/tests/default" $PYVERSION
166116
fi
167117

168-
echo "Run ml tests ..."
169-
run_test "pyspark.ml.feature"
170-
run_test "pyspark.ml.classification"
171-
run_test "pyspark.ml.recommendation"
172-
run_test "pyspark.ml.regression"
173-
run_test "pyspark.ml.tuning"
174-
run_test "pyspark.ml.tests"
175-
run_test "pyspark.ml.evaluation"
176-
}
177-
178-
function run_streaming_tests() {
179-
if [ $DO_STREAMING_TESTS == 0 ]; then
180-
return 0
118+
if [[ $FAILED == 0 ]]; then
119+
now=$(date +"%s")
120+
echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds"
181121
fi
182-
183-
echo "Run streaming tests ..."
184-
185-
KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly
186-
JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}"
187-
for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do
188-
if [[ ! -e "$f" ]]; then
189-
echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2
190-
echo "You need to build Spark with " \
191-
"'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \
192-
"'build/mvn package' before running this program" 1>&2
193-
exit 1
194-
fi
195-
KAFKA_ASSEMBLY_JAR="$f"
196-
done
197-
198-
export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
199-
run_test "pyspark.streaming.util"
200-
run_test "pyspark.streaming.tests"
201-
}
202-
203-
echo "Running PySpark tests. Output is in python/$LOG_FILE."
204-
205-
export PYSPARK_PYTHON="python"
206-
207-
# Try to test with Python 2.6, since that's the minimum version that we support:
208-
if [ $(which python2.6) ]; then
209-
export PYSPARK_PYTHON="python2.6"
210-
fi
211-
212-
echo "Testing with Python version:"
213-
$PYSPARK_PYTHON --version
214-
215-
run_core_tests
216-
run_sql_tests
217-
run_mllib_tests
218-
run_ml_tests
219-
run_streaming_tests
220-
221-
# Try to test with Python 3
222-
if [ $(which python3.4) ]; then
223-
export PYSPARK_PYTHON="python3.4"
224-
echo "Testing with Python3.4 version:"
225-
$PYSPARK_PYTHON --version
226-
227-
run_core_tests
228-
run_sql_tests
229-
run_mllib_tests
230-
run_ml_tests
231-
run_streaming_tests
232-
fi
233-
234-
# Try to test with PyPy
235-
if [ $(which pypy) ]; then
236-
export PYSPARK_PYTHON="pypy"
237-
echo "Testing with PyPy version:"
238-
$PYSPARK_PYTHON --version
239-
240-
run_core_tests
241-
run_sql_tests
242-
run_streaming_tests
243-
fi
244-
245-
if [[ $FAILED == 0 ]]; then
246-
now=$(date +"%s")
247-
echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds"
248-
fi
249-
250-
# TODO: in the long-run, it would be nice to use a test runner like `nose`.
251-
# The doctest fixtures are the current barrier to doing this.
122+
done

python/tests/common

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
19+
function run_test() {
20+
echo -en "Running test: $1 ... " | tee -a $LOG_FILE
21+
start=$(date +"%s")
22+
SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 > $LOG_FILE 2>&1
23+
24+
FAILED=$((PIPESTATUS[0]||$FAILED))
25+
26+
# Fail and exit on the first test failure.
27+
if [[ $FAILED != 0 ]]; then
28+
cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
29+
echo -en "\033[31m" # Red
30+
echo "Had test failures; see logs."
31+
echo -en "\033[0m" # No color
32+
exit -1
33+
else
34+
now=$(date +"%s")
35+
echo "ok ($(($now - $start))s)"
36+
fi
37+
}
38+
39+
function run_core_tests() {
40+
if [ $DO_CORE_TESTS == 0 ]; then
41+
return 0
42+
fi
43+
44+
echo "Run core tests ..."
45+
run_test "pyspark.rdd"
46+
run_test "pyspark.context"
47+
run_test "pyspark.conf"
48+
run_test "pyspark.broadcast"
49+
run_test "pyspark.accumulators"
50+
run_test "pyspark.serializers"
51+
run_test "pyspark.profiler"
52+
run_test "pyspark.shuffle"
53+
run_test "pyspark.tests"
54+
}
55+
56+
function run_sql_tests() {
57+
if [ $DO_SQL_TESTS == 0 ]; then
58+
return 0
59+
fi
60+
61+
echo "Run sql tests ..."
62+
run_test "pyspark.sql.types"
63+
run_test "pyspark.sql.context"
64+
run_test "pyspark.sql.column"
65+
run_test "pyspark.sql.dataframe"
66+
run_test "pyspark.sql.group"
67+
run_test "pyspark.sql.functions"
68+
run_test "pyspark.sql.readwriter"
69+
run_test "pyspark.sql.window"
70+
run_test "pyspark.sql.tests"
71+
}
72+
73+
function run_mllib_tests() {
74+
if [ $DO_MLLIB_TESTS == 0 ]; then
75+
return 0
76+
fi
77+
78+
echo "Run mllib tests ..."
79+
run_test "pyspark.mllib.classification"
80+
run_test "pyspark.mllib.clustering"
81+
run_test "pyspark.mllib.evaluation"
82+
run_test "pyspark.mllib.feature"
83+
run_test "pyspark.mllib.fpm"
84+
run_test "pyspark.mllib.linalg"
85+
run_test "pyspark.mllib.random"
86+
run_test "pyspark.mllib.recommendation"
87+
run_test "pyspark.mllib.regression"
88+
run_test "pyspark.mllib.stat._statistics"
89+
run_test "pyspark.mllib.stat.KernelDensity"
90+
run_test "pyspark.mllib.tree"
91+
run_test "pyspark.mllib.util"
92+
run_test "pyspark.mllib.tests"
93+
}
94+
95+
function run_ml_tests() {
96+
if [ $DO_ML_TESTS == 0 ]; then
97+
return 0
98+
fi
99+
100+
echo "Run ml tests ..."
101+
run_test "pyspark.ml.feature"
102+
run_test "pyspark.ml.classification"
103+
run_test "pyspark.ml.recommendation"
104+
run_test "pyspark.ml.regression"
105+
run_test "pyspark.ml.tuning"
106+
run_test "pyspark.ml.tests"
107+
run_test "pyspark.ml.evaluation"
108+
}
109+
110+
function run_streaming_tests() {
111+
if [ $DO_STREAMING_TESTS == 0 ]; then
112+
return 0
113+
fi
114+
115+
echo "Run streaming tests ..."
116+
117+
KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly
118+
JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}"
119+
for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do
120+
if [[ ! -e "$f" ]]; then
121+
echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2
122+
echo "You need to build Spark with " \
123+
"'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \
124+
"'build/mvn package' before running this program" 1>&2
125+
exit 1
126+
fi
127+
KAFKA_ASSEMBLY_JAR="$f"
128+
done
129+
130+
export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
131+
run_test "pyspark.streaming.util"
132+
run_test "pyspark.streaming.tests"
133+
}

0 commit comments

Comments
 (0)