@@ -172,7 +172,8 @@ class DAGScheduler(
172172
173173  //  For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with
174174  //  every task. When we detect a node failing, we note the current epoch number and failed
175-   //  executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask results.
175+   //  executor or host, increment it for new tasks, and use this to ignore stray
176+   //  ShuffleMapTask results.
176177  // 
177178  //  TODO: Garbage collect information about failure epochs when we know there are no more
178179  //        stray messages to detect.
@@ -1348,7 +1349,14 @@ class DAGScheduler(
13481349
13491350          //  TODO: mark the executor as failed only if there were lots of fetch failures on it
13501351          if  (bmAddress !=  null ) {
1351-             handleExecutorLost(bmAddress.executorId, filesLost =  true , Some (task.epoch))
1352+             if  (env.blockManager.externalShuffleServiceEnabled) {
1353+               val  currentEpoch  =  Some (task.epoch).getOrElse(mapOutputTracker.getEpoch)
1354+               removeExecutor(bmAddress.executorId, currentEpoch)
1355+               handleExternalShuffleFailure(bmAddress.host, currentEpoch)
1356+             }
1357+             else  {
1358+               handleExecutorLost(bmAddress.executorId, filesLost =  true , Some (task.epoch))
1359+             }
13521360          }
13531361        }
13541362
@@ -1368,6 +1376,30 @@ class DAGScheduler(
13681376    }
13691377  }
13701378
1379+   /**  
1380+    * Removes an executor from the driver endpoint. 
1381+    * 
1382+    * @param  execId  id of the executor to be removed 
1383+    * @param  currentEpoch  epoch during which the executor failure was caught to avoid allowing 
1384+    *                     stray failures from possibly retriggering the detection of an 
1385+    *                     executor as lost. 
1386+    * 
1387+    * @return  boolean value indicating whether the executor was removed or not 
1388+    */  
1389+   private [scheduler] def  removeExecutor (execId : String , currentEpoch : Long ):  Boolean  =  {
1390+     if  (! failedEpoch.contains(execId) ||  failedEpoch(execId) <  currentEpoch) {
1391+       failedEpoch(execId) =  currentEpoch
1392+       logInfo(" Executor lost: %s (epoch %d)" 
1393+       blockManagerMaster.removeExecutor(execId)
1394+       true 
1395+     }
1396+     else  {
1397+       logDebug(" Additional executor lost message for " +  execId + 
1398+         " (epoch " +  currentEpoch +  " )" 
1399+       false 
1400+     }
1401+   }
1402+ 
13711403  /**  
13721404   * Responds to an executor being lost. This is called inside the event loop, so it assumes it can 
13731405   * modify the scheduler's internal state. Use executorLost() to post a loss event from outside. 
@@ -1385,38 +1417,76 @@ class DAGScheduler(
13851417      filesLost : Boolean ,
13861418      maybeEpoch : Option [Long ] =  None ) {
13871419    val  currentEpoch  =  maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
1388-     if  (! failedEpoch.contains(execId) ||  failedEpoch(execId) <  currentEpoch) {
1389-       failedEpoch(execId) =  currentEpoch
1390-       logInfo(" Executor lost: %s (epoch %d)" 
1391-       blockManagerMaster.removeExecutor(execId)
1420+     val  executorRemoved  =  removeExecutor(execId, currentEpoch)
1421+     if  (executorRemoved &&  (filesLost ||  ! env.blockManager.externalShuffleServiceEnabled)) {
1422+       handleInternalShuffleFailure(execId, currentEpoch)
1423+     }
1424+   }
13921425
1393-       if  (filesLost ||  ! env.blockManager.externalShuffleServiceEnabled) {
1394-         logInfo(" Shuffle files lost for executor: %s (epoch %d)" 
1395-         //  TODO: This will be really slow if we keep accumulating shuffle map stages
1396-         for  ((shuffleId, stage) <-  shuffleIdToMapStage) {
1397-           stage.removeOutputsOnExecutor(execId)
1398-           mapOutputTracker.registerMapOutputs(
1399-             shuffleId,
1400-             stage.outputLocInMapOutputTrackerFormat(),
1401-             changeEpoch =  true )
1402-         }
1403-         if  (shuffleIdToMapStage.isEmpty) {
1404-           mapOutputTracker.incrementEpoch()
1405-         }
1406-         clearCacheLocs()
1407-       }
1426+   /**  
1427+    * Responds to an internal shuffle becoming unavailable for an executor. 
1428+    * 
1429+    * We will assume that we've lost all the shuffle blocks for the executor. 
1430+    * 
1431+    * @param  execId  id of the executor for which internal shuffle is unavailable 
1432+    * @param  currentEpoch  epoch during which the failure was caught. 
1433+    */  
1434+   private [scheduler] def  handleInternalShuffleFailure (execId : String , currentEpoch : Long ):  Unit  =  {
1435+     logInfo(" Shuffle files lost for executor: %s (epoch %d)" 
1436+     cleanShuffleOutputs((stage : ShuffleMapStage ) =>  {
1437+       stage.removeOutputsOnExecutor(execId)
1438+     })
1439+   }
1440+ 
1441+   /**  
1442+    * Responds to an external shuffle service becoming unavailable on a host. 
1443+    * 
1444+    * We will assume that we've lost all the shuffle blocks on that host if FetchFailed occurred 
1445+    * while external shuffle is being used. 
1446+    * 
1447+    * @param  host  address of the host on which external shuffle is unavailable 
1448+    * @param  currentEpoch  epoch during which the failure was caught. This is passed to avoid 
1449+    *                     allowing stray fetch failures from possibly retriggering the detection 
1450+    *                     of external shuffle service becoming unavailable. 
1451+    */  
1452+   private [scheduler] def   handleExternalShuffleFailure (host : String , currentEpoch : Long ):  Unit  =  {
1453+     if  (! failedEpoch.contains(host) ||  failedEpoch(host) <  currentEpoch) {
1454+       failedEpoch(host) =  currentEpoch
1455+       logInfo(" Shuffle files lost for host: %s (epoch %d)" 
1456+       cleanShuffleOutputs((stage : ShuffleMapStage ) =>  {
1457+         stage.removeOutputsOnHost(host)
1458+       })
14081459    } else  {
1409-       logDebug(" Additional executor lost message for " +  execId + 
1410-                " (epoch " +  currentEpoch +  " )" 
1460+       logDebug((" Additional Shuffle files " + 
1461+         " lost message for host: %s (epoch %d)" 
1462+     }
1463+   }
1464+ 
1465+   private [scheduler] def  cleanShuffleOutputs (outputsCleaner : ShuffleMapStage  =>  _):  Unit  =  {
1466+     //  TODO: This will be really slow if we keep accumulating shuffle map stages
1467+     for  ((shuffleId, stage) <-  shuffleIdToMapStage) {
1468+       outputsCleaner(stage)
1469+       mapOutputTracker.registerMapOutputs(
1470+         shuffleId,
1471+         stage.outputLocInMapOutputTrackerFormat(),
1472+         changeEpoch =  true )
1473+     }
1474+     if  (shuffleIdToMapStage.isEmpty) {
1475+       mapOutputTracker.incrementEpoch()
14111476    }
1477+     clearCacheLocs()
14121478  }
14131479
14141480  private [scheduler] def  handleExecutorAdded (execId : String , host : String ) {
14151481    //  remove from failedEpoch(execId) ?
14161482    if  (failedEpoch.contains(execId)) {
1417-       logInfo(" Host  added was in lost list earlier:  "   +  host )
1483+       logInfo(" Executor %s  added was in lost list earlier. " .format(execId) )
14181484      failedEpoch -=  execId
14191485    }
1486+ 
1487+     if  (failedEpoch.contains(host)) {
1488+       failedEpoch -=  host
1489+     }
14201490  }
14211491
14221492  private [scheduler] def  handleStageCancellation (stageId : Int , reason : Option [String ]) {
0 commit comments