Skip to content

Commit 7edbc98

Browse files
committed
Merge remote-tracking branch 'apache-github/master' into state-cleanup
2 parents 6c9dcf6 + d666053 commit 7edbc98

File tree

231 files changed

+4171
-740
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

231 files changed

+4171
-740
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ spark-*-bin.tar.gz
4747
unit-tests.log
4848
/lib/
4949
rat-results.txt
50+
scalastyle.txt

.travis.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
language: scala
17+
scala:
18+
- "2.10.3"
19+
jdk:
20+
- oraclejdk7
21+
env:
22+
matrix:
23+
- TEST="scalastyle assembly/assembly"
24+
- TEST="catalyst/test sql/test streaming/test mllib/test graphx/test bagel/test"
25+
- TEST=hive/test
26+
cache:
27+
directories:
28+
- $HOME/.m2
29+
- $HOME/.ivy2
30+
- $HOME/.sbt
31+
script:
32+
- "sbt ++$TRAVIS_SCALA_VERSION $TEST"

bin/compute-classpath.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
3636
# Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
3737
# the future.
3838
if [ -f "$FWDIR"/sql/hive/target/scala-$SCALA_VERSION/spark-hive-assembly-*.jar ]; then
39-
echo "Hive assembly found, including hive support. If this isn't desired run sbt hive/clean."
4039

4140
# Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
4241
DATANUCLEUSJARS=$(JARS=("$FWDIR/lib_managed/jars"/datanucleus-*.jar); IFS=:; echo "${JARS[*]}")

bin/spark-shell

Lines changed: 168 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -30,67 +30,189 @@ esac
3030
# Enter posix mode for bash
3131
set -o posix
3232

33-
CORE_PATTERN="^[0-9]+$"
34-
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
35-
33+
## Global script variables
3634
FWDIR="$(cd `dirname $0`/..; pwd)"
3735

38-
if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
39-
echo "Usage: spark-shell [OPTIONS]"
40-
echo "OPTIONS:"
41-
echo "-c --cores num, the maximum number of cores to be used by the spark shell"
42-
echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
43-
echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
44-
echo "-h --help, print this help information"
45-
exit
46-
fi
36+
SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
37+
DEFAULT_MASTER="local"
38+
MASTER=${MASTER:-""}
39+
40+
info_log=0
41+
42+
#CLI Color Templates
43+
txtund=$(tput sgr 0 1) # Underline
44+
txtbld=$(tput bold) # Bold
45+
bldred=${txtbld}$(tput setaf 1) # red
46+
bldyel=${txtbld}$(tput setaf 3) # yellow
47+
bldblu=${txtbld}$(tput setaf 4) # blue
48+
bldwht=${txtbld}$(tput setaf 7) # white
49+
txtrst=$(tput sgr0) # Reset
50+
info=${bldwht}*${txtrst} # Feedback
51+
pass=${bldblu}*${txtrst}
52+
warn=${bldred}*${txtrst}
53+
ques=${bldblu}?${txtrst}
54+
55+
# Helper function to describe the script usage
56+
function usage() {
57+
cat << EOF
58+
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
59+
60+
${txtbld}OPTIONS${txtrst}:
61+
-h --help : Print this help information.
62+
-c --cores : The maximum number of cores to be used by the Spark Shell.
63+
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
64+
is followed by m for megabytes or g for gigabytes, e.g. "1g".
65+
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
66+
by m for megabytes or g for gigabytes, e.g. "1g".
67+
-m --master : A full string that describes the Spark Master, defaults to "local"
68+
e.g. "spark://localhost:7077".
69+
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
70+
Spark Context.
71+
72+
e.g.
73+
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
74+
75+
EOF
76+
}
77+
78+
function out_error(){
79+
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
80+
usage
81+
exit 1
82+
}
83+
84+
function log_info(){
85+
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
86+
}
87+
88+
function log_warn(){
89+
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
90+
}
4791

48-
for o in "$@"; do
49-
if [ "$1" = "-c" -o "$1" = "--cores" ]; then
50-
shift
92+
# PATTERNS used to validate more than one optional arg.
93+
ARG_FLAG_PATTERN="^-"
94+
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
95+
NUM_PATTERN="^[0-9]+$"
96+
PORT_PATTERN="^[0-9]+$"
97+
98+
# Setters for optional args.
99+
function set_cores(){
100+
CORE_PATTERN="^[0-9]+$"
51101
if [[ "$1" =~ $CORE_PATTERN ]]; then
52-
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
53-
shift
102+
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
54103
else
55-
echo "ERROR: wrong format for -c/--cores"
56-
exit 1
104+
out_error "wrong format for $2"
57105
fi
58-
fi
59-
if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
60-
shift
106+
}
107+
108+
function set_em(){
61109
if [[ $1 =~ $MEM_PATTERN ]]; then
62110
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
63-
shift
64111
else
65-
echo "ERROR: wrong format for --execmem/-em"
66-
exit 1
112+
out_error "wrong format for $2"
67113
fi
68-
fi
69-
if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
70-
shift
114+
}
115+
116+
function set_dm(){
71117
if [[ $1 =~ $MEM_PATTERN ]]; then
72118
export SPARK_DRIVER_MEMORY=$1
73-
shift
74119
else
75-
echo "ERROR: wrong format for --drivermem/-dm"
76-
exit 1
120+
out_error "wrong format for $2"
77121
fi
78-
fi
79-
done
122+
}
123+
124+
function set_spark_log_conf(){
125+
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
126+
}
80127

81-
# Set MASTER from spark-env if possible
82-
DEFAULT_SPARK_MASTER_PORT=7077
83-
if [ -z "$MASTER" ]; then
84-
. $FWDIR/bin/load-spark-env.sh
85-
if [ "x" != "x$SPARK_MASTER_IP" ]; then
86-
if [ "y" != "y$SPARK_MASTER_PORT" ]; then
87-
SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
128+
function set_spark_master(){
129+
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
130+
MASTER="$1"
88131
else
89-
SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
132+
out_error "wrong format for $2"
133+
fi
134+
}
135+
136+
function resolve_spark_master(){
137+
# Set MASTER from spark-env if possible
138+
DEFAULT_SPARK_MASTER_PORT=7077
139+
if [ -z "$MASTER" ]; then
140+
. $FWDIR/bin/load-spark-env.sh
141+
if [ -n "$SPARK_MASTER_IP" ]; then
142+
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
143+
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
144+
fi
145+
fi
146+
147+
if [ -z "$MASTER" ]; then
148+
MASTER="$DEFAULT_MASTER"
90149
fi
91-
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
92-
fi
93-
fi
150+
151+
}
152+
153+
function main(){
154+
log_info "Base Directory set to $FWDIR"
155+
156+
resolve_spark_master
157+
log_info "Spark Master is $MASTER"
158+
159+
log_info "Spark REPL options $SPARK_REPL_OPTS"
160+
if $cygwin; then
161+
# Workaround for issue involving JLine and Cygwin
162+
# (see http://sourceforge.net/p/jline/bugs/40/).
163+
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
164+
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
165+
# (see https://github.com/sbt/sbt/issues/562).
166+
stty -icanon min 1 -echo > /dev/null 2>&1
167+
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
168+
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
169+
stty icanon echo > /dev/null 2>&1
170+
else
171+
export SPARK_REPL_OPTS
172+
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
173+
fi
174+
}
175+
176+
for option in "$@"
177+
do
178+
case $option in
179+
-h | --help )
180+
usage
181+
exit 1
182+
;;
183+
-c | --cores)
184+
shift
185+
_1=$1
186+
shift
187+
set_cores $_1 "-c/--cores"
188+
;;
189+
-em | --executor-memory)
190+
shift
191+
_1=$1
192+
shift
193+
set_em $_1 "-em/--executor-memory"
194+
;;
195+
-dm | --driver-memory)
196+
shift
197+
_1=$1
198+
shift
199+
set_dm $_1 "-dm/--driver-memory"
200+
;;
201+
-m | --master)
202+
shift
203+
_1=$1
204+
shift
205+
set_spark_master $_1 "-m/--master"
206+
;;
207+
--log-conf)
208+
shift
209+
set_spark_log_conf "true"
210+
info_log=1
211+
;;
212+
?)
213+
;;
214+
esac
215+
done
94216

95217
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
96218
# binary distribution of Spark where Scala is not installed
@@ -120,22 +242,10 @@ if [[ ! $? ]]; then
120242
saved_stty=""
121243
fi
122244

123-
if $cygwin; then
124-
# Workaround for issue involving JLine and Cygwin
125-
# (see http://sourceforge.net/p/jline/bugs/40/).
126-
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
127-
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
128-
# (see https://github.com/sbt/sbt/issues/562).
129-
stty -icanon min 1 -echo > /dev/null 2>&1
130-
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
131-
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
132-
stty icanon echo > /dev/null 2>&1
133-
else
134-
export SPARK_REPL_OPTS
135-
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
136-
fi
245+
main
137246

138247
# record the exit status lest it be overwritten:
139248
# then reenable echo and propagate the code.
140249
exit_status=$?
141250
onExit
251+

bin/spark-submit

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
21+
ORIG_ARGS=$@
22+
23+
while (($#)); do
24+
if [ $1 = "--deploy-mode" ]; then
25+
DEPLOY_MODE=$2
26+
elif [ $1 = "--driver-memory" ]; then
27+
DRIVER_MEMORY=$2
28+
fi
29+
30+
shift
31+
done
32+
33+
if [ ! -z $DRIVER_MEMORY ] && [ ! -z $DEPLOY_MODE ] && [ $DEPLOY_MODE = "client" ]; then
34+
export SPARK_MEM=$DRIVER_MEMORY
35+
fi
36+
37+
$SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit $ORIG_ARGS
38+

core/src/main/scala/org/apache/spark/Dependency.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
4444
* Represents a dependency on the output of a shuffle stage.
4545
* @param rdd the parent RDD
4646
* @param partitioner partitioner used to partition the shuffle output
47-
* @param serializer [[Serializer]] to use. If set to null, the default serializer, as specified
48-
* by `spark.serializer` config option, will be used.
47+
* @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to null,
48+
* the default serializer, as specified by `spark.serializer` config option, will
49+
* be used.
4950
*/
5051
class ShuffleDependency[K, V](
5152
@transient rdd: RDD[_ <: Product2[K, V]],

0 commit comments

Comments
 (0)