From e726210ae0f9897ff30701bff624ac9594e886a1 Mon Sep 17 00:00:00 2001 From: Berenguer Blasi Date: Fri, 29 Aug 2025 10:28:28 +0200 Subject: [PATCH 1/2] HCD-181 AssertionError from CassandraStreamReader --- .../apache/cassandra/db/streaming/CassandraStreamReader.java | 2 +- .../org/apache/cassandra/repair/consistent/LocalSessions.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReader.java b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReader.java index ad5e4cb6b9a9..3a8a9b363f79 100644 --- a/src/java/org/apache/cassandra/db/streaming/CassandraStreamReader.java +++ b/src/java/org/apache/cassandra/db/streaming/CassandraStreamReader.java @@ -86,7 +86,7 @@ public CassandraStreamReader(StreamMessageHeader header, CassandraStreamHeader s { // we should only ever be streaming pending repair // sstables if the session has a pending repair id - assert session.getPendingRepair().equals(header.pendingRepair); + assert session.getPendingRepair().equals(header.pendingRepair) : session.getPendingRepair() + " != " + header.pendingRepair; } this.session = session; this.tableId = header.tableId; diff --git a/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java b/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java index 082dc37f64a4..11611cb84466 100644 --- a/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java +++ b/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java @@ -791,8 +791,8 @@ void setStateAndSave(LocalSession session, ConsistentSession.State state) synchronized (session) { Preconditions.checkArgument(session.getState().canTransitionTo(state), - "Invalid state transition %s -> %s", - session.getState(), state); + "Invalid state transition %s -> %s for session %s", + session.getState(), state, session.sessionID); logger.trace("Changing LocalSession state from {} -> {} for {}", session.getState(), state, session.sessionID); boolean wasCompleted = session.isCompleted(); session.setState(state); From 88a1b57d0f61231b3f7dd8d6073417e1f4b7a84a Mon Sep 17 00:00:00 2001 From: Berenguer Blasi Date: Wed, 3 Sep 2025 14:16:33 +0200 Subject: [PATCH 2/2] Enhanced diags --- .../apache/cassandra/repair/RepairMessageVerbHandler.java | 6 +++++- .../cassandra/repair/consistent/CoordinatorSession.java | 3 ++- .../apache/cassandra/repair/consistent/LocalSessions.java | 6 ++++-- .../org/apache/cassandra/service/ActiveRepairService.java | 3 +++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java index 8a16089c3cf7..8d532096948b 100644 --- a/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java +++ b/src/java/org/apache/cassandra/repair/RepairMessageVerbHandler.java @@ -223,9 +223,13 @@ else if (message.verb() == STATUS_RSP) } catch (Exception e) { - logger.error("Got error processing {}, removing parent repair session", message.verb()); if (desc != null && desc.parentSessionId != null) + { + logger.error("Got error processing {}, removing parent repair session {}", message.verb(), desc.parentSessionId); ActiveRepairService.instance.removeParentRepairSession(desc.parentSessionId); + } + else + logger.error("Got error processing {}, removing parent repair session", message.verb()); throw new RuntimeException(e); } } diff --git a/src/java/org/apache/cassandra/repair/consistent/CoordinatorSession.java b/src/java/org/apache/cassandra/repair/consistent/CoordinatorSession.java index 5ddac3f74535..3b7971fdd916 100644 --- a/src/java/org/apache/cassandra/repair/consistent/CoordinatorSession.java +++ b/src/java/org/apache/cassandra/repair/consistent/CoordinatorSession.java @@ -37,6 +37,7 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.SettableFuture; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.lang3.time.DurationFormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -274,7 +275,7 @@ public synchronized void fail() logger.error("Can't transition endpoints {} to FAILED", cantFail, new RuntimeException()); return; } - logger.info("Incremental repair session {} failed", sessionID); + logger.info("Incremental repair session {} failed \n {}", sessionID, ExceptionUtils.getStackTrace(new Exception())); sendFailureMessageToParticipants(); setAll(State.FAILED); diff --git a/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java b/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java index 11611cb84466..18ef44e8e2b6 100644 --- a/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java +++ b/src/java/org/apache/cassandra/repair/consistent/LocalSessions.java @@ -55,6 +55,8 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.MoreExecutors; +import org.apache.commons.lang3.exception.ExceptionUtils; + import org.apache.cassandra.cql3.PageSize; import org.apache.cassandra.db.compaction.AbstractCompactionTask; import org.apache.cassandra.db.compaction.CleanupTask; @@ -417,7 +419,7 @@ public boolean canCleanup(UUID sessionID) */ public void cancelSession(UUID sessionID, boolean force) { - logger.info("Cancelling local repair session {}", sessionID); + logger.info("Cancelling local repair session {} \n {}", sessionID, ExceptionUtils.getStackTrace(new Exception())); LocalSession session = getSession(sessionID); Preconditions.checkArgument(session != null, "Session {} does not exist", sessionID); Preconditions.checkArgument(force || session.coordinator.equals(getBroadcastAddressAndPort()), @@ -831,7 +833,7 @@ public void failSession(LocalSession session, boolean sendMessage) } else if (session.getState() != FAILED) { - logger.info("Failing local repair session {}", session.sessionID); + logger.info("Failing local repair session {} \n {}", session.sessionID, ExceptionUtils.getStackTrace(new Exception())); setStateAndSave(session, FAILED); } } diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 22925958e30d..2f48359501d0 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -37,6 +37,8 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; +import org.apache.commons.lang3.exception.ExceptionUtils; + import org.apache.cassandra.concurrent.DebuggableThreadPoolExecutor; import org.apache.cassandra.db.compaction.CompactionManager; import org.apache.cassandra.locator.EndpointsByRange; @@ -413,6 +415,7 @@ public void run() public synchronized void terminateSessions() { + logger.info("Terminating repair sessions \n {}", ExceptionUtils.getStackTrace(new Exception())); Throwable cause = new IOException("Terminate session is called"); for (RepairSession session : sessions.values()) {