Skip to content

Commit 77be8b9

Browse files
JerryLeadankurdave
authored andcommitted
[SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain
The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 The f closure of `PartitionsRDD(ZippedPartitionsRDD2)` contains a `$outer` that references EdgeRDD/VertexRDD, which causes task's serialization chain become very long in iterative GraphX applications. As a result, StackOverflow error will occur. If we set "f = null" in `clearDependencies()`, checkpoint() can cut off the long serialization chain. More details and explanation can be found in the JIRA. Author: JerryLead <[email protected]> Author: Lijie Xu <[email protected]> Closes apache#3545 from JerryLead/my_core and squashes the following commits: f7faea5 [JerryLead] checkpoint() should clear the f to avoid StackOverflow error c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master
1 parent 17c162f commit 77be8b9

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](
7777

7878
private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag](
7979
sc: SparkContext,
80-
f: (Iterator[A], Iterator[B]) => Iterator[V],
80+
var f: (Iterator[A], Iterator[B]) => Iterator[V],
8181
var rdd1: RDD[A],
8282
var rdd2: RDD[B],
8383
preservesPartitioning: Boolean = false)
@@ -92,13 +92,14 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]
9292
super.clearDependencies()
9393
rdd1 = null
9494
rdd2 = null
95+
f = null
9596
}
9697
}
9798

9899
private[spark] class ZippedPartitionsRDD3
99100
[A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag](
100101
sc: SparkContext,
101-
f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
102+
var f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
102103
var rdd1: RDD[A],
103104
var rdd2: RDD[B],
104105
var rdd3: RDD[C],
@@ -117,13 +118,14 @@ private[spark] class ZippedPartitionsRDD3
117118
rdd1 = null
118119
rdd2 = null
119120
rdd3 = null
121+
f = null
120122
}
121123
}
122124

123125
private[spark] class ZippedPartitionsRDD4
124126
[A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag](
125127
sc: SparkContext,
126-
f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
128+
var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
127129
var rdd1: RDD[A],
128130
var rdd2: RDD[B],
129131
var rdd3: RDD[C],
@@ -145,5 +147,6 @@ private[spark] class ZippedPartitionsRDD4
145147
rdd2 = null
146148
rdd3 = null
147149
rdd4 = null
150+
f = null
148151
}
149152
}

0 commit comments

Comments
 (0)