Skip to content

Commit d061b85

Browse files
gpoulinrxin
authored andcommitted
[SPARK-12678][CORE] MapPartitionsRDD clearDependencies
MapPartitionsRDD was keeping a reference to `prev` after a call to `clearDependencies` which could lead to memory leak. Author: Guillaume Poulin <[email protected]> Closes #10623 from gpoulin/map_partition_deps. (cherry picked from commit b673852) Signed-off-by: Reynold Xin <[email protected]>
1 parent 94af69c commit d061b85

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.spark.{Partition, TaskContext}
2525
* An RDD that applies the provided function to every partition of the parent RDD.
2626
*/
2727
private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
28-
prev: RDD[T],
28+
var prev: RDD[T],
2929
f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext, partition index, iterator)
3030
preservesPartitioning: Boolean = false)
3131
extends RDD[U](prev) {
@@ -36,4 +36,9 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
3636

3737
override def compute(split: Partition, context: TaskContext): Iterator[U] =
3838
f(context, split.index, firstParent[T].iterator(split, context))
39+
40+
override def clearDependencies() {
41+
super.clearDependencies()
42+
prev = null
43+
}
3944
}

0 commit comments

Comments
 (0)