Skip to content

Commit 1009ef1

Browse files
committed
[SPARK-7384][Core][Tests] Fix flaky tests for distributed mode in BroadcastSuite
Fixed the following failure: https://amplab.cs.berkeley.edu/jenkins/job/Spark-1.3-Maven-pre-YARN/hadoop.version=1.0.4,label=centos/452/testReport/junit/org.apache.spark.broadcast/BroadcastSuite/Unpersisting_HttpBroadcast_on_executors_and_driver_in_distributed_mode/ The tests should wait until all slaves are up. Otherwise, there may be only a part of `BlockManager`s registered, and fail the tests.
1 parent fec7b29 commit 1009ef1

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -407,15 +407,15 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
407407

408408
if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
409409

410+
_jobProgressListener = new JobProgressListener(_conf)
411+
listenerBus.addListener(jobProgressListener)
412+
410413
// Create the Spark execution environment (cache, map output tracker, etc)
411414
_env = createSparkEnv(_conf, isLocal, listenerBus)
412415
SparkEnv.set(_env)
413416

414417
_metadataCleaner = new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, _conf)
415418

416-
_jobProgressListener = new JobProgressListener(_conf)
417-
listenerBus.addListener(jobProgressListener)
418-
419419
_statusTracker = new SparkStatusTracker(this)
420420

421421
_progressBar =

core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@
1717

1818
package org.apache.spark.broadcast
1919

20+
import scala.concurrent.duration._
21+
import scala.language.postfixOps
2022
import scala.util.Random
2123

2224
import org.scalatest.{Assertions, FunSuite}
25+
import org.scalatest.concurrent.Eventually._
2326

2427
import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkEnv}
2528
import org.apache.spark.io.SnappyCompressionCodec
@@ -307,7 +310,17 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
307310
removeFromDriver: Boolean) {
308311

309312
sc = if (distributed) {
310-
new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", broadcastConf)
313+
val _sc =
314+
new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", broadcastConf)
315+
// Wait until all salves are up
316+
eventually(timeout(10 seconds), interval(10 milliseconds)) {
317+
_sc.jobProgressListener.synchronized {
318+
val numBlockManagers = _sc.jobProgressListener.blockManagerIds.size
319+
assert(numBlockManagers == numSlaves + 1,
320+
s"Expect ${numSlaves + 1} block managers, but was ${numBlockManagers}")
321+
}
322+
}
323+
_sc
311324
} else {
312325
new SparkContext("local", "test", broadcastConf)
313326
}

0 commit comments

Comments
 (0)