Skip to content

Commit c1a9e89

Browse files
ddupgsunxin
authored andcommitted
HBASE-25562 ReplicationSourceWALReader log and handle exception immediately without retrying (#2943)
Signed-off-by: Wellington Chevreuil <[email protected]> Signed-off-by: stack <[email protected]> Signed-off-by: shahrs87
1 parent 627ac01 commit c1a9e89

File tree

1 file changed

+17
-13
lines changed

1 file changed

+17
-13
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -150,14 +150,13 @@ public void run() {
150150
}
151151
}
152152
} catch (IOException e) { // stream related
153-
if (sleepMultiplier < maxRetriesMultiplier) {
154-
LOG.debug("Failed to read stream of replication entries: " + e);
155-
sleepMultiplier++;
156-
} else {
157-
LOG.error("Failed to read stream of replication entries", e);
158-
handleEofException(e);
153+
if (!handleEofException(e)) {
154+
LOG.warn("Failed to read stream of replication entries", e);
155+
if (sleepMultiplier < maxRetriesMultiplier) {
156+
sleepMultiplier ++;
157+
}
158+
Threads.sleep(sleepForRetries * sleepMultiplier);
159159
}
160-
Threads.sleep(sleepForRetries * sleepMultiplier);
161160
} catch (InterruptedException e) {
162161
LOG.trace("Interrupted while sleeping between WAL reads");
163162
Thread.currentThread().interrupt();
@@ -244,25 +243,30 @@ private void handleEmptyWALEntryBatch(Path currentPath) throws InterruptedExcept
244243
}
245244
}
246245

247-
// if we get an EOF due to a zero-length log, and there are other logs in queue
248-
// (highly likely we've closed the current log), we've hit the max retries, and autorecovery is
249-
// enabled, then dump the log
250-
private void handleEofException(IOException e) {
246+
/**
247+
* if we get an EOF due to a zero-length log, and there are other logs in queue
248+
* (highly likely we've closed the current log), and autorecovery is
249+
* enabled, then dump the log
250+
* @return true only the IOE can be handled
251+
*/
252+
private boolean handleEofException(IOException e) {
251253
PriorityBlockingQueue<Path> queue = logQueue.getQueue(walGroupId);
252254
// Dump the log even if logQueue size is 1 if the source is from recovered Source
253255
// since we don't add current log to recovered source queue so it is safe to remove.
254256
if ((e instanceof EOFException || e.getCause() instanceof EOFException) &&
255257
(source.isRecovered() || queue.size() > 1) && this.eofAutoRecovery) {
256258
try {
257259
if (fs.getFileStatus(queue.peek()).getLen() == 0) {
258-
LOG.warn("Forcing removal of 0 length log in queue: " + queue.peek());
260+
LOG.warn("Forcing removal of 0 length log in queue: {}", queue.peek());
259261
logQueue.remove(walGroupId);
260262
currentPosition = 0;
263+
return true;
261264
}
262265
} catch (IOException ioe) {
263-
LOG.warn("Couldn't get file length information about log " + queue.peek());
266+
LOG.warn("Couldn't get file length information about log {}", queue.peek());
264267
}
265268
}
269+
return false;
266270
}
267271

268272
public Path getCurrentPath() {

0 commit comments

Comments
 (0)