@@ -250,8 +250,24 @@ public void regionServerReport(ServerName sn, ServerMetrics sl) throws YouAreDea
250250 // the ServerName to use. Here we presume a master has already done
251251 // that so we'll press on with whatever it gave us for ServerName.
252252 if (!checkAndRecordNewServer (sn , sl )) {
253- LOG .info ("RegionServerReport ignored, could not record the server: " + sn );
254- return ; // Not recorded, so no need to move on
253+ // Master already registered server with same (host + port) and higher startcode.
254+ // This can happen if regionserver report comes late from old server
255+ // (possible race condition), by that time master has already processed SCP for that
256+ // server and started accepting regionserver report from new server i.e. server with
257+ // same (host + port) and higher startcode.
258+ // The exception thrown here is not meant to tell the region server it is dead because if
259+ // there is a new server on the same host port, the old server should have already been
260+ // dead in ideal situation.
261+ // The exception thrown here is to skip the later steps of the whole regionServerReport
262+ // request processing. Usually, after recording it in ServerManager, we will call the
263+ // related methods in AssignmentManager to record region states. If the region server
264+ // is already dead, we should not do these steps anymore, so here we throw an exception
265+ // to let the upper layer know that they should not continue processing anymore.
266+ final String errorMsg = "RegionServerReport received from " + sn
267+ + ", but another server with the same name and higher startcode is already registered,"
268+ + " ignoring" ;
269+ LOG .warn (errorMsg );
270+ throw new YouAreDeadException (errorMsg );
255271 }
256272 }
257273 updateLastFlushedSequenceIds (sn , sl );
0 commit comments