SPARK-1432: Make sure that all metadata fields are properly cleaned

Davis Shepherd · pwendell · commit a3c51c6ea232 · 2014-04-07T10:02:00.000-07:00
While working on spark-1337 with @pwendell, we noticed that not all of the metadata maps in JobProgessListener were being properly cleaned. This could lead to a (hypothetical) memory leak issue should a job run long enough. This patch aims to address the issue. Author: Davis Shepherd <davis@conviva.com> Closes #338 from dgshep/master and squashes the following commits: a77b65c [Davis Shepherd] In the contex of SPARK-1337: Make sure that all metadata fields are properly cleaned
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -83,7 +83,6 @@ private[ui] class JobProgressListener(conf: SparkConf) extends SparkListener {
     if (stages.size > retainedStages) {
       val toRemove = math.max(retainedStages / 10, 1)
       stages.take(toRemove).foreach { s =>
-        stageIdToTaskData.remove(s.stageId)
         stageIdToTime.remove(s.stageId)
         stageIdToShuffleRead.remove(s.stageId)
         stageIdToShuffleWrite.remove(s.stageId)
@@ -92,8 +91,10 @@ private[ui] class JobProgressListener(conf: SparkConf) extends SparkListener {
         stageIdToTasksActive.remove(s.stageId)
         stageIdToTasksComplete.remove(s.stageId)
         stageIdToTasksFailed.remove(s.stageId)
+        stageIdToTaskData.remove(s.stageId)
+        stageIdToExecutorSummaries.remove(s.stageId)
         stageIdToPool.remove(s.stageId)
-        if (stageIdToDescription.contains(s.stageId)) {stageIdToDescription.remove(s.stageId)}
+        stageIdToDescription.remove(s.stageId)
       }
       stages.trimStart(toRemove)
     }