Skip to content

Commit a306020

Browse files
committed
Merge branch 'master' of git://git.apache.org/spark into emacs-metafiles-ignore
2 parents 6a0a5eb + 6772afe commit a306020

File tree

13 files changed

+150
-72
lines changed

13 files changed

+150
-72
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,4 @@ dist/*
5959
.*iws
6060
logs
6161
.*scalastyle-output.xml
62+
.*dependency-reduced-pom.xml

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ submitting any copyrighted material via pull request, email, or other means
88
you agree to license the material under the project's open source license and
99
warrant that you have the legal authority to do so.
1010

11-
Please see [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
11+
Please see the [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
1212
for more information.

core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
6767
}
6868

6969
private val appHeader = Seq(
70+
"App ID",
7071
"App Name",
7172
"Started",
7273
"Completed",
@@ -81,7 +82,8 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
8182
val duration = UIUtils.formatDuration(info.endTime - info.startTime)
8283
val lastUpdated = UIUtils.formatDate(info.lastUpdated)
8384
<tr>
84-
<td><a href={uiAddress}>{info.name}</a></td>
85+
<td><a href={uiAddress}>{info.id}</a></td>
86+
<td>{info.name}</td>
8587
<td>{startTime}</td>
8688
<td>{endTime}</td>
8789
<td>{duration}</td>

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -489,23 +489,24 @@ private[spark] class Master(
489489
// First schedule drivers, they take strict precedence over applications
490490
// Randomization helps balance drivers
491491
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
492-
val aliveWorkerNum = shuffledAliveWorkers.size
492+
val numWorkersAlive = shuffledAliveWorkers.size
493493
var curPos = 0
494+
494495
for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
495496
// We assign workers to each waiting driver in a round-robin fashion. For each driver, we
496497
// start from the last worker that was assigned a driver, and continue onwards until we have
497498
// explored all alive workers.
498-
curPos = (curPos + 1) % aliveWorkerNum
499-
val startPos = curPos
500499
var launched = false
501-
while (curPos != startPos && !launched) {
500+
var numWorkersVisited = 0
501+
while (numWorkersVisited < numWorkersAlive && !launched) {
502502
val worker = shuffledAliveWorkers(curPos)
503+
numWorkersVisited += 1
503504
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
504505
launchDriver(worker, driver)
505506
waitingDrivers -= driver
506507
launched = true
507508
}
508-
curPos = (curPos + 1) % aliveWorkerNum
509+
curPos = (curPos + 1) % numWorkersAlive
509510
}
510511
}
511512

core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ private[spark] class SparkDeploySchedulerBackend(
3434
var client: AppClient = null
3535
var stopping = false
3636
var shutdownCallback : (SparkDeploySchedulerBackend) => Unit = _
37-
var appId: String = _
37+
@volatile var appId: String = _
3838

3939
val registrationLock = new Object()
4040
var registrationDone = false

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,15 +1382,15 @@ private[spark] object Utils extends Logging {
13821382
}
13831383

13841384
/**
1385-
* Default number of retries in binding to a port.
1385+
* Default maximum number of retries when binding to a port before giving up.
13861386
*/
13871387
val portMaxRetries: Int = {
13881388
if (sys.props.contains("spark.testing")) {
13891389
// Set a higher number of retries for tests...
1390-
sys.props.get("spark.ports.maxRetries").map(_.toInt).getOrElse(100)
1390+
sys.props.get("spark.port.maxRetries").map(_.toInt).getOrElse(100)
13911391
} else {
13921392
Option(SparkEnv.get)
1393-
.flatMap(_.conf.getOption("spark.ports.maxRetries"))
1393+
.flatMap(_.conf.getOption("spark.port.maxRetries"))
13941394
.map(_.toInt)
13951395
.getOrElse(16)
13961396
}

core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,13 @@ class JsonProtocolSuite extends FunSuite {
115115
workerInfo.lastHeartbeat = JsonConstants.currTimeInMillis
116116
workerInfo
117117
}
118+
118119
def createExecutorRunner(): ExecutorRunner = {
119120
new ExecutorRunner("appId", 123, createAppDesc(), 4, 1234, null, "workerId", "host",
120121
new File("sparkHome"), new File("workDir"), "akka://worker",
121122
new SparkConf, ExecutorState.RUNNING)
122123
}
124+
123125
def createDriverRunner(): DriverRunner = {
124126
new DriverRunner(new SparkConf(), "driverId", new File("workDir"), new File("sparkHome"),
125127
createDriverDesc(), null, "akka://worker")

dev/run-tests

Lines changed: 109 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -21,44 +21,73 @@
2121
FWDIR="$(cd "`dirname $0`"/..; pwd)"
2222
cd "$FWDIR"
2323

24-
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
25-
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
26-
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
27-
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
28-
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
29-
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
30-
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
31-
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
32-
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
24+
# Remove work directory
25+
rm -rf ./work
26+
27+
# Build against the right verison of Hadoop.
28+
{
29+
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
30+
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
31+
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
32+
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
33+
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
34+
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
35+
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
36+
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
37+
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
38+
fi
3339
fi
34-
fi
3540

36-
if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
37-
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
38-
fi
41+
if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
42+
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
43+
fi
44+
}
3945

4046
export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
4147

42-
echo "SBT_MAVEN_PROFILES_ARGS=\"$SBT_MAVEN_PROFILES_ARGS\""
43-
44-
# Remove work directory
45-
rm -rf ./work
46-
47-
if test -x "$JAVA_HOME/bin/java"; then
48-
declare java_cmd="$JAVA_HOME/bin/java"
49-
else
50-
declare java_cmd=java
51-
fi
52-
JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
53-
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run because JDK version is < 1.8."
48+
# Determine Java path and version.
49+
{
50+
if test -x "$JAVA_HOME/bin/java"; then
51+
declare java_cmd="$JAVA_HOME/bin/java"
52+
else
53+
declare java_cmd=java
54+
fi
55+
56+
# We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
57+
JAVA_VERSION=$(
58+
$java_cmd -version 2>&1 \
59+
| grep -e "^java version" --max-count=1 \
60+
| sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
61+
)
62+
63+
if [ "$JAVA_VERSION" -lt 18 ]; then
64+
echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
65+
fi
66+
}
5467

55-
# Partial solution for SPARK-1455. Only run Hive tests if there are sql changes.
68+
# Only run Hive tests if there are sql changes.
69+
# Partial solution for SPARK-1455.
5670
if [ -n "$AMPLAB_JENKINS" ]; then
5771
git fetch origin master:master
58-
diffs=`git diff --name-only master | grep "^\(sql/\)\|\(bin/spark-sql\)\|\(sbin/start-thriftserver.sh\)"`
59-
if [ -n "$diffs" ]; then
60-
echo "Detected changes in SQL. Will run Hive test suite."
72+
73+
sql_diffs=$(
74+
git diff --name-only master \
75+
| grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
76+
)
77+
78+
non_sql_diffs=$(
79+
git diff --name-only master \
80+
| grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
81+
)
82+
83+
if [ -n "$sql_diffs" ]; then
84+
echo "[info] Detected changes in SQL. Will run Hive test suite."
6185
_RUN_SQL_TESTS=true
86+
87+
if [ -z "$non_sql_diffs" ]; then
88+
echo "[info] Detected no changes except in SQL. Will only run SQL tests."
89+
_SQL_TESTS_ONLY=true
90+
fi
6291
fi
6392
fi
6493

@@ -70,42 +99,72 @@ echo ""
7099
echo "========================================================================="
71100
echo "Running Apache RAT checks"
72101
echo "========================================================================="
73-
dev/check-license
102+
./dev/check-license
74103

75104
echo ""
76105
echo "========================================================================="
77106
echo "Running Scala style checks"
78107
echo "========================================================================="
79-
dev/lint-scala
108+
./dev/lint-scala
80109

81110
echo ""
82111
echo "========================================================================="
83112
echo "Running Python style checks"
84113
echo "========================================================================="
85-
dev/lint-python
114+
./dev/lint-python
115+
116+
echo ""
117+
echo "========================================================================="
118+
echo "Building Spark"
119+
echo "========================================================================="
120+
121+
{
122+
# We always build with Hive because the PySpark Spark SQL tests need it.
123+
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
124+
125+
echo "[info] Building Spark with these arguments: $BUILD_MVN_PROFILE_ARGS"
126+
127+
# NOTE: echo "q" is needed because sbt on encountering a build file with failure
128+
#+ (either resolution or compilation) prompts the user for input either q, r, etc
129+
#+ to quit or retry. This echo is there to make it not block.
130+
# QUESTION: Why doesn't 'yes "q"' work?
131+
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
132+
echo -e "q\n" \
133+
| sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly \
134+
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
135+
}
86136

87137
echo ""
88138
echo "========================================================================="
89139
echo "Running Spark unit tests"
90140
echo "========================================================================="
91141

92-
# Build Spark; we always build with Hive because the PySpark Spark SQL tests need it.
93-
# echo "q" is needed because sbt on encountering a build file with failure
94-
# (either resolution or compilation) prompts the user for input either q, r,
95-
# etc to quit or retry. This echo is there to make it not block.
96-
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive "
97-
echo -e "q\n" | sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly | \
98-
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
99-
100-
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled:
101-
if [ -n "$_RUN_SQL_TESTS" ]; then
102-
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
103-
fi
104-
# echo "q" is needed because sbt on encountering a build file with failure
105-
# (either resolution or compilation) prompts the user for input either q, r,
106-
# etc to quit or retry. This echo is there to make it not block.
107-
echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS test | \
108-
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
142+
{
143+
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
144+
# This must be a single argument, as it is.
145+
if [ -n "$_RUN_SQL_TESTS" ]; then
146+
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
147+
fi
148+
149+
if [ -n "$_SQL_TESTS_ONLY" ]; then
150+
# This must be an array of individual arguments. Otherwise, having one long string
151+
#+ will be interpreted as a single test, which doesn't work.
152+
SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "hive-thriftserver/test")
153+
else
154+
SBT_MAVEN_TEST_ARGS=("test")
155+
fi
156+
157+
echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS ${SBT_MAVEN_TEST_ARGS[@]}"
158+
159+
# NOTE: echo "q" is needed because sbt on encountering a build file with failure
160+
#+ (either resolution or compilation) prompts the user for input either q, r, etc
161+
#+ to quit or retry. This echo is there to make it not block.
162+
# QUESTION: Why doesn't 'yes "q"' work?
163+
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
164+
echo -e "q\n" \
165+
| sbt/sbt "$SBT_MAVEN_PROFILES_ARGS" "${SBT_MAVEN_TEST_ARGS[@]}" \
166+
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
167+
}
109168

110169
echo ""
111170
echo "========================================================================="
@@ -117,4 +176,4 @@ echo ""
117176
echo "========================================================================="
118177
echo "Detecting binary incompatibilites with MiMa"
119178
echo "========================================================================="
120-
dev/mima
179+
./dev/mima

docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ Apart from these, the following properties are also available, and may be useful
657657
<td><code>spark.port.maxRetries</code></td>
658658
<td>16</td>
659659
<td>
660-
Maximum number of retries when binding to a port before giving up.
660+
Default maximum number of retries when binding to a port before giving up.
661661
</td>
662662
</tr>
663663
<tr>

docs/spark-standalone.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ tight firewall settings. For a complete list of ports to configure, see the
307307

308308
By default, standalone scheduling clusters are resilient to Worker failures (insofar as Spark itself is resilient to losing work by moving it to other workers). However, the scheduler uses a Master to make scheduling decisions, and this (by default) creates a single point of failure: if the Master crashes, no new applications can be created. In order to circumvent this, we have two high availability schemes, detailed below.
309309

310-
# Standby Masters with ZooKeeper
310+
## Standby Masters with ZooKeeper
311311

312312
**Overview**
313313

@@ -347,7 +347,7 @@ There's an important distinction to be made between "registering with a Master"
347347

348348
Due to this property, new Masters can be created at any time, and the only thing you need to worry about is that _new_ applications and Workers can find it to register with in case it becomes the leader. Once registered, you're taken care of.
349349

350-
# Single-Node Recovery with Local File System
350+
## Single-Node Recovery with Local File System
351351

352352
**Overview**
353353

0 commit comments

Comments
 (0)