Skip to content

Commit 0b7b7fd

Browse files
lianchengpwendell
authored andcommitted
[SPARK-1194] Fix the same-RDD rule for cache replacement
SPARK-1194: https://spark-project.atlassian.net/browse/SPARK-1194 In the current implementation, when selecting candidate blocks to be swapped out, once we find a block from the same RDD that the block to be stored belongs to, cache eviction fails and aborts. In this PR, we keep selecting blocks *not* from the RDD that the block to be stored belongs to until either enough free space can be ensured (cache eviction succeeds) or all such blocks are checked (cache eviction fails). Author: Cheng Lian <[email protected]> Closes #96 from liancheng/fix-spark-1194 and squashes the following commits: 2524ab9 [Cheng Lian] Added regression test case for SPARK-1194 6e40c22 [Cheng Lian] Remove redundant comments 40cdcb2 [Cheng Lian] Bug fix, and addressed PR comments from @mridulm 62c92ac [Cheng Lian] Fixed SPARK-1194 https://spark-project.atlassian.net/browse/SPARK-1194
1 parent 8ad486a commit 0b7b7fd

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

core/src/main/scala/org/apache/spark/storage/MemoryStore.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,13 +236,10 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
236236
while (maxMemory - (currentMemory - selectedMemory) < space && iterator.hasNext) {
237237
val pair = iterator.next()
238238
val blockId = pair.getKey
239-
if (rddToAdd.isDefined && rddToAdd == getRddId(blockId)) {
240-
logInfo("Will not store " + blockIdToAdd + " as it would require dropping another " +
241-
"block from the same RDD")
242-
return false
239+
if (rddToAdd.isEmpty || rddToAdd != getRddId(blockId)) {
240+
selectedBlocks += blockId
241+
selectedMemory += pair.getValue.size
243242
}
244-
selectedBlocks += blockId
245-
selectedMemory += pair.getValue.size
246243
}
247244
}
248245

@@ -264,6 +261,8 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
264261
}
265262
return true
266263
} else {
264+
logInfo(s"Will not store $blockIdToAdd as it would require dropping another block " +
265+
"from the same RDD")
267266
return false
268267
}
269268
}

core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,4 +662,18 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
662662
assert(store.getSingle("a1") == None, "a1 should not be in store")
663663
}
664664
}
665+
666+
test("SPARK-1194 regression: fix the same-RDD rule for cache replacement") {
667+
store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf, securityMgr)
668+
store.putSingle(rdd(0, 0), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
669+
store.putSingle(rdd(1, 0), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
670+
// Access rdd_1_0 to ensure it's not least recently used.
671+
assert(store.getSingle(rdd(1, 0)).isDefined, "rdd_1_0 was not in store")
672+
// According to the same-RDD rule, rdd_1_0 should be replaced here.
673+
store.putSingle(rdd(0, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
674+
// rdd_1_0 should have been replaced, even it's not least recently used.
675+
assert(store.memoryStore.contains(rdd(0, 0)), "rdd_0_0 was not in store")
676+
assert(store.memoryStore.contains(rdd(0, 1)), "rdd_0_1 was not in store")
677+
assert(!store.memoryStore.contains(rdd(1, 0)), "rdd_1_0 was in store")
678+
}
665679
}

0 commit comments

Comments
 (0)