Skip to content

Commit ccd4436

Browse files
tdasjeanlyn
authored andcommitted
[SPARK-7139] [STREAMING] Allow received block metadata to be saved to WAL and recovered on driver failure
- Enabled ReceivedBlockTracker WAL by default - Stored block metadata in the WAL - Optimized WALBackedBlockRDD by skipping block fetch when the block is known to not exist in Spark Author: Tathagata Das <[email protected]> Closes apache#5732 from tdas/SPARK-7139 and squashes the following commits: 575476e [Tathagata Das] Added more tests to get 100% coverage of the WALBackedBlockRDD 19668ba [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7139 685fab3 [Tathagata Das] Addressed comments in PR 637bc9c [Tathagata Das] Changed segment to handle 466212c [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7139 5f67a59 [Tathagata Das] Fixed HdfsUtils to handle append in local file system 1bc5bc3 [Tathagata Das] Fixed bug on unexpected recovery d06fa21 [Tathagata Das] Enabled ReceivedBlockTracker by default, stored block metadata and optimized block fetching in WALBackedBlockRDD
1 parent 742fef7 commit ccd4436

File tree

10 files changed

+281
-153
lines changed

10 files changed

+281
-153
lines changed

core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ private[spark]
3131
class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds: Array[BlockId])
3232
extends RDD[T](sc, Nil) {
3333

34-
@transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get)
34+
@transient lazy val _locations = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get)
3535
@volatile private var _isValid = true
3636

3737
override def getPartitions: Array[Partition] = {
@@ -54,7 +54,7 @@ class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds
5454

5555
override def getPreferredLocations(split: Partition): Seq[String] = {
5656
assertValid()
57-
locations_(split.asInstanceOf[BlockRDDPartition].blockId)
57+
_locations(split.asInstanceOf[BlockRDDPartition].blockId)
5858
}
5959

6060
/**
@@ -79,14 +79,14 @@ class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds
7979

8080
/** Check if this BlockRDD is valid. If not valid, exception is thrown. */
8181
private[spark] def assertValid() {
82-
if (!_isValid) {
82+
if (!isValid) {
8383
throw new SparkException(
8484
"Attempted to use %s after its blocks have been removed!".format(toString))
8585
}
8686
}
8787

8888
protected def getBlockIdLocations(): Map[BlockId, Seq[String]] = {
89-
locations_
89+
_locations
9090
}
9191
}
9292

streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ package org.apache.spark.streaming.dstream
2020
import scala.reflect.ClassTag
2121

2222
import org.apache.spark.rdd.{BlockRDD, RDD}
23-
import org.apache.spark.storage.{BlockId, StorageLevel}
23+
import org.apache.spark.storage.BlockId
2424
import org.apache.spark.streaming._
2525
import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD
26-
import org.apache.spark.streaming.receiver.{Receiver, WriteAheadLogBasedStoreResult}
27-
import org.apache.spark.streaming.scheduler.{InputInfo, ReceivedBlockInfo}
26+
import org.apache.spark.streaming.receiver.Receiver
27+
import org.apache.spark.streaming.util.WriteAheadLogUtils
2828

2929
/**
3030
* Abstract class for defining any [[org.apache.spark.streaming.dstream.InputDStream]]
@@ -64,31 +64,30 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont
6464
} else {
6565
// Otherwise, ask the tracker for all the blocks that have been allocated to this stream
6666
// for this batch
67-
val blockInfos =
68-
ssc.scheduler.receiverTracker.getBlocksOfBatch(validTime).get(id).getOrElse(Seq.empty)
69-
val blockStoreResults = blockInfos.map { _.blockStoreResult }
70-
val blockIds = blockStoreResults.map { _.blockId.asInstanceOf[BlockId] }.toArray
67+
val receiverTracker = ssc.scheduler.receiverTracker
68+
val blockInfos = receiverTracker.getBlocksOfBatch(validTime).getOrElse(id, Seq.empty)
69+
val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray
7170

72-
// Register the input blocks information into InputInfoTracker
73-
val inputInfo = InputInfo(id, blockInfos.map(_.numRecords).sum)
74-
ssc.scheduler.inputInfoTracker.reportInfo(validTime, inputInfo)
71+
// Are WAL record handles present with all the blocks
72+
val areWALRecordHandlesPresent = blockInfos.forall { _.walRecordHandleOption.nonEmpty }
7573

76-
// Check whether all the results are of the same type
77-
val resultTypes = blockStoreResults.map { _.getClass }.distinct
78-
if (resultTypes.size > 1) {
79-
logWarning("Multiple result types in block information, WAL information will be ignored.")
80-
}
81-
82-
// If all the results are of type WriteAheadLogBasedStoreResult, then create
83-
// WriteAheadLogBackedBlockRDD else create simple BlockRDD.
84-
if (resultTypes.size == 1 && resultTypes.head == classOf[WriteAheadLogBasedStoreResult]) {
85-
val logSegments = blockStoreResults.map {
86-
_.asInstanceOf[WriteAheadLogBasedStoreResult].walRecordHandle
87-
}.toArray
88-
// Since storeInBlockManager = false, the storage level does not matter.
89-
new WriteAheadLogBackedBlockRDD[T](ssc.sparkContext,
90-
blockIds, logSegments, storeInBlockManager = false, StorageLevel.MEMORY_ONLY_SER)
74+
if (areWALRecordHandlesPresent) {
75+
// If all the blocks have WAL record handle, then create a WALBackedBlockRDD
76+
val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray
77+
val walRecordHandles = blockInfos.map { _.walRecordHandleOption.get }.toArray
78+
new WriteAheadLogBackedBlockRDD[T](
79+
ssc.sparkContext, blockIds, walRecordHandles, isBlockIdValid)
9180
} else {
81+
// Else, create a BlockRDD. However, if there are some blocks with WAL info but not others
82+
// then that is unexpected and log a warning accordingly.
83+
if (blockInfos.find(_.walRecordHandleOption.nonEmpty).nonEmpty) {
84+
if (WriteAheadLogUtils.enableReceiverLog(ssc.conf)) {
85+
logError("Some blocks do not have Write Ahead Log information; " +
86+
"this is unexpected and data may not be recoverable after driver failures")
87+
} else {
88+
logWarning("Some blocks have Write Ahead Log information; this is unexpected")
89+
}
90+
}
9291
new BlockRDD[T](ssc.sc, blockIds)
9392
}
9493
}

streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala

Lines changed: 98 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import java.util.UUID
2323
import scala.reflect.ClassTag
2424
import scala.util.control.NonFatal
2525

26+
import org.apache.commons.io.FileUtils
27+
2628
import org.apache.spark._
2729
import org.apache.spark.rdd.BlockRDD
2830
import org.apache.spark.storage.{BlockId, StorageLevel}
@@ -31,30 +33,42 @@ import org.apache.spark.streaming.util._
3133
/**
3234
* Partition class for [[org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD]].
3335
* It contains information about the id of the blocks having this partition's data and
34-
* the segment of the write ahead log that backs the partition.
36+
* the corresponding record handle in the write ahead log that backs the partition.
3537
* @param index index of the partition
3638
* @param blockId id of the block having the partition data
39+
* @param isBlockIdValid Whether the block Ids are valid (i.e., the blocks are present in the Spark
40+
* executors). If not, then block lookups by the block ids will be skipped.
41+
* By default, this is an empty array signifying true for all the blocks.
3742
* @param walRecordHandle Handle of the record in a write ahead log having the partition data
3843
*/
3944
private[streaming]
4045
class WriteAheadLogBackedBlockRDDPartition(
4146
val index: Int,
4247
val blockId: BlockId,
43-
val walRecordHandle: WriteAheadLogRecordHandle)
44-
extends Partition
48+
val isBlockIdValid: Boolean,
49+
val walRecordHandle: WriteAheadLogRecordHandle
50+
) extends Partition
4551

4652

4753
/**
4854
* This class represents a special case of the BlockRDD where the data blocks in
4955
* the block manager are also backed by data in write ahead logs. For reading
5056
* the data, this RDD first looks up the blocks by their ids in the block manager.
51-
* If it does not find them, it looks up the corresponding data in the write ahead log.
57+
* If it does not find them, it looks up the WAL using the corresponding record handle.
58+
* The lookup of the blocks from the block manager can be skipped by setting the corresponding
59+
* element in isBlockIdValid to false. This is a performance optimization which does not affect
60+
* correctness, and it can be used in situations where it is known that the block
61+
* does not exist in the Spark executors (e.g. after a failed driver is restarted).
62+
*
5263
*
5364
* @param sc SparkContext
5465
* @param blockIds Ids of the blocks that contains this RDD's data
5566
* @param walRecordHandles Record handles in write ahead logs that contain this RDD's data
56-
* @param storeInBlockManager Whether to store in the block manager after reading
57-
* from the WAL record
67+
* @param isBlockIdValid Whether the block Ids are valid (i.e., the blocks are present in the Spark
68+
* executors). If not, then block lookups by the block ids will be skipped.
69+
* By default, this is an empty array signifying true for all the blocks.
70+
* @param storeInBlockManager Whether to store a block in the block manager
71+
* after reading it from the WAL
5872
* @param storageLevel storage level to store when storing in block manager
5973
* (applicable when storeInBlockManager = true)
6074
*/
@@ -63,23 +77,32 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
6377
@transient sc: SparkContext,
6478
@transient blockIds: Array[BlockId],
6579
@transient walRecordHandles: Array[WriteAheadLogRecordHandle],
66-
storeInBlockManager: Boolean,
67-
storageLevel: StorageLevel)
80+
@transient isBlockIdValid: Array[Boolean] = Array.empty,
81+
storeInBlockManager: Boolean = false,
82+
storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY_SER)
6883
extends BlockRDD[T](sc, blockIds) {
6984

7085
require(
7186
blockIds.length == walRecordHandles.length,
72-
s"Number of block ids (${blockIds.length}) must be " +
73-
s"the same as number of WAL record handles (${walRecordHandles.length}})!")
87+
s"Number of block Ids (${blockIds.length}) must be " +
88+
s" same as number of WAL record handles (${walRecordHandles.length}})")
89+
90+
require(
91+
isBlockIdValid.isEmpty || isBlockIdValid.length == blockIds.length,
92+
s"Number of elements in isBlockIdValid (${isBlockIdValid.length}) must be " +
93+
s" same as number of block Ids (${blockIds.length})")
7494

7595
// Hadoop configuration is not serializable, so broadcast it as a serializable.
7696
@transient private val hadoopConfig = sc.hadoopConfiguration
7797
private val broadcastedHadoopConf = new SerializableWritable(hadoopConfig)
7898

99+
override def isValid(): Boolean = true
100+
79101
override def getPartitions: Array[Partition] = {
80102
assertValid()
81-
Array.tabulate(blockIds.size) { i =>
82-
new WriteAheadLogBackedBlockRDDPartition(i, blockIds(i), walRecordHandles(i))
103+
Array.tabulate(blockIds.length) { i =>
104+
val isValid = if (isBlockIdValid.length == 0) true else isBlockIdValid(i)
105+
new WriteAheadLogBackedBlockRDDPartition(i, blockIds(i), isValid, walRecordHandles(i))
83106
}
84107
}
85108

@@ -94,51 +117,57 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
94117
val blockManager = SparkEnv.get.blockManager
95118
val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition]
96119
val blockId = partition.blockId
97-
blockManager.get(blockId) match {
98-
case Some(block) => // Data is in Block Manager
99-
val iterator = block.data.asInstanceOf[Iterator[T]]
100-
logDebug(s"Read partition data of $this from block manager, block $blockId")
101-
iterator
102-
case None => // Data not found in Block Manager, grab it from write ahead log file
103-
var dataRead: ByteBuffer = null
104-
var writeAheadLog: WriteAheadLog = null
105-
try {
106-
// The WriteAheadLogUtils.createLog*** method needs a directory to create a
107-
// WriteAheadLog object as the default FileBasedWriteAheadLog needs a directory for
108-
// writing log data. However, the directory is not needed if data needs to be read, hence
109-
// a dummy path is provided to satisfy the method parameter requirements.
110-
// FileBasedWriteAheadLog will not create any file or directory at that path.
111-
// FileBasedWriteAheadLog will not create any file or directory at that path. Also,
112-
// this dummy directory should not already exist otherwise the WAL will try to recover
113-
// past events from the directory and throw errors.
114-
val nonExistentDirectory = new File(
115-
System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString).getAbsolutePath
116-
writeAheadLog = WriteAheadLogUtils.createLogForReceiver(
117-
SparkEnv.get.conf, nonExistentDirectory, hadoopConf)
118-
dataRead = writeAheadLog.read(partition.walRecordHandle)
119-
} catch {
120-
case NonFatal(e) =>
121-
throw new SparkException(
122-
s"Could not read data from write ahead log record ${partition.walRecordHandle}", e)
123-
} finally {
124-
if (writeAheadLog != null) {
125-
writeAheadLog.close()
126-
writeAheadLog = null
127-
}
128-
}
129-
if (dataRead == null) {
120+
121+
def getBlockFromBlockManager(): Option[Iterator[T]] = {
122+
blockManager.get(blockId).map(_.data.asInstanceOf[Iterator[T]])
123+
}
124+
125+
def getBlockFromWriteAheadLog(): Iterator[T] = {
126+
var dataRead: ByteBuffer = null
127+
var writeAheadLog: WriteAheadLog = null
128+
try {
129+
// The WriteAheadLogUtils.createLog*** method needs a directory to create a
130+
// WriteAheadLog object as the default FileBasedWriteAheadLog needs a directory for
131+
// writing log data. However, the directory is not needed if data needs to be read, hence
132+
// a dummy path is provided to satisfy the method parameter requirements.
133+
// FileBasedWriteAheadLog will not create any file or directory at that path.
134+
// FileBasedWriteAheadLog will not create any file or directory at that path. Also,
135+
// this dummy directory should not already exist otherwise the WAL will try to recover
136+
// past events from the directory and throw errors.
137+
val nonExistentDirectory = new File(
138+
System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString).getAbsolutePath
139+
writeAheadLog = WriteAheadLogUtils.createLogForReceiver(
140+
SparkEnv.get.conf, nonExistentDirectory, hadoopConf)
141+
dataRead = writeAheadLog.read(partition.walRecordHandle)
142+
} catch {
143+
case NonFatal(e) =>
130144
throw new SparkException(
131-
s"Could not read data from write ahead log record ${partition.walRecordHandle}, " +
132-
s"read returned null")
145+
s"Could not read data from write ahead log record ${partition.walRecordHandle}", e)
146+
} finally {
147+
if (writeAheadLog != null) {
148+
writeAheadLog.close()
149+
writeAheadLog = null
133150
}
134-
logInfo(s"Read partition data of $this from write ahead log, record handle " +
135-
partition.walRecordHandle)
136-
if (storeInBlockManager) {
137-
blockManager.putBytes(blockId, dataRead, storageLevel)
138-
logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
139-
dataRead.rewind()
140-
}
141-
blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
151+
}
152+
if (dataRead == null) {
153+
throw new SparkException(
154+
s"Could not read data from write ahead log record ${partition.walRecordHandle}, " +
155+
s"read returned null")
156+
}
157+
logInfo(s"Read partition data of $this from write ahead log, record handle " +
158+
partition.walRecordHandle)
159+
if (storeInBlockManager) {
160+
blockManager.putBytes(blockId, dataRead, storageLevel)
161+
logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
162+
dataRead.rewind()
163+
}
164+
blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
165+
}
166+
167+
if (partition.isBlockIdValid) {
168+
getBlockFromBlockManager().getOrElse { getBlockFromWriteAheadLog() }
169+
} else {
170+
getBlockFromWriteAheadLog()
142171
}
143172
}
144173

@@ -149,12 +178,23 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
149178
*/
150179
override def getPreferredLocations(split: Partition): Seq[String] = {
151180
val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition]
152-
val blockLocations = getBlockIdLocations().get(partition.blockId)
181+
val blockLocations = if (partition.isBlockIdValid) {
182+
getBlockIdLocations().get(partition.blockId)
183+
} else {
184+
None
185+
}
186+
153187
blockLocations.getOrElse {
154188
partition.walRecordHandle match {
155189
case fileSegment: FileBasedWriteAheadLogSegment =>
156-
HdfsUtils.getFileSegmentLocations(
157-
fileSegment.path, fileSegment.offset, fileSegment.length, hadoopConfig)
190+
try {
191+
HdfsUtils.getFileSegmentLocations(
192+
fileSegment.path, fileSegment.offset, fileSegment.length, hadoopConfig)
193+
} catch {
194+
case NonFatal(e) =>
195+
logError("Error getting WAL file segment locations", e)
196+
Seq.empty
197+
}
158198
case _ =>
159199
Seq.empty
160200
}

streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ private[streaming] class ReceiverSupervisorImpl(
146146
val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
147147
logDebug(s"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms")
148148

149-
val blockInfo = ReceivedBlockInfo(streamId, numRecords, blockStoreResult)
149+
val blockInfo = ReceivedBlockInfo(streamId, numRecords, metadataOption, blockStoreResult)
150150
trackerEndpoint.askWithRetry[Boolean](AddBlock(blockInfo))
151151
logDebug(s"Reported block $blockId")
152152
}

streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,38 @@
1717

1818
package org.apache.spark.streaming.scheduler
1919

20-
import org.apache.spark.streaming.receiver.ReceivedBlockStoreResult
20+
import org.apache.spark.storage.StreamBlockId
21+
import org.apache.spark.streaming.receiver.{ReceivedBlockStoreResult, WriteAheadLogBasedStoreResult}
22+
import org.apache.spark.streaming.util.WriteAheadLogRecordHandle
2123

2224
/** Information about blocks received by the receiver */
2325
private[streaming] case class ReceivedBlockInfo(
2426
streamId: Int,
2527
numRecords: Long,
28+
metadataOption: Option[Any],
2629
blockStoreResult: ReceivedBlockStoreResult
27-
)
30+
) {
31+
32+
@volatile private var _isBlockIdValid = true
33+
34+
def blockId: StreamBlockId = blockStoreResult.blockId
35+
36+
def walRecordHandleOption: Option[WriteAheadLogRecordHandle] = {
37+
blockStoreResult match {
38+
case walStoreResult: WriteAheadLogBasedStoreResult => Some(walStoreResult.walRecordHandle)
39+
case _ => None
40+
}
41+
}
42+
43+
/** Is the block ID valid, that is, is the block present in the Spark executors. */
44+
def isBlockIdValid(): Boolean = _isBlockIdValid
45+
46+
/**
47+
* Set the block ID as invalid. This is useful when it is known that the block is not present
48+
* in the Spark executors.
49+
*/
50+
def setBlockIdInvalid(): Unit = {
51+
_isBlockIdValid = false
52+
}
53+
}
2854

0 commit comments

Comments
 (0)