2020import static org .apache .hadoop .hdfs .DFSConfigKeys .*;
2121import static org .apache .hadoop .hdfs .protocol .BlockType .CONTIGUOUS ;
2222import static org .apache .hadoop .hdfs .protocol .BlockType .STRIPED ;
23+ import static org .apache .hadoop .hdfs .server .blockmanagement .LowRedundancyBlocks .LEVEL ;
2324import static org .apache .hadoop .util .ExitUtil .terminate ;
2425import static org .apache .hadoop .util .Time .now ;
2526
@@ -869,7 +870,7 @@ public void metaSave(PrintWriter out) {
869870 synchronized (neededReconstruction ) {
870871 out .println ("Metasave: Blocks waiting for reconstruction: "
871872 + neededReconstruction .getLowRedundancyBlockCount ());
872- for (int i = 0 ; i < neededReconstruction . LEVEL ; i ++) {
873+ for (int i = 0 ; i < LEVEL ; i ++) {
873874 if (i != neededReconstruction .QUEUE_WITH_CORRUPT_BLOCKS ) {
874875 for (Iterator <BlockInfo > it = neededReconstruction .iterator (i );
875876 it .hasNext ();) {
@@ -969,7 +970,7 @@ private void dumpBlockMeta(Block block, PrintWriter out) {
969970 // source node returned is not used
970971 chooseSourceDatanodes (blockInfo , containingNodes ,
971972 containingLiveReplicasNodes , numReplicas , new ArrayList <Byte >(),
972- new ArrayList <Byte >(), new ArrayList <Byte >(), LowRedundancyBlocks . LEVEL );
973+ new ArrayList <Byte >(), new ArrayList <Byte >(), LEVEL );
973974
974975 // containingLiveReplicasNodes can include READ_ONLY_SHARED replicas which are
975976 // not included in the numReplicas.liveReplicas() count
@@ -2099,28 +2100,22 @@ int computeInvalidateWork(int nodesToProcess) {
20992100 * @return number of blocks scheduled for reconstruction during this
21002101 * iteration.
21012102 */
2102- int computeBlockReconstructionWork (int blocksToProcess ) {
2103+ int scheduleBlockReconstructionWork (int blocksToProcess ) {
21032104 List <List <BlockInfo >> blocksToReconstruct = null ;
21042105 namesystem .writeLock ();
2105- try {
2106- boolean reset = false ;
2107- if (replQueueResetToHeadThreshold > 0 ) {
2108- if (replQueueCallsSinceReset >= replQueueResetToHeadThreshold ) {
2109- reset = true ;
2110- replQueueCallsSinceReset = 0 ;
2111- } else {
2112- replQueueCallsSinceReset ++;
2113- }
2106+ boolean reset = false ;
2107+ if (replQueueResetToHeadThreshold > 0 ) {
2108+ if (replQueueCallsSinceReset >= replQueueResetToHeadThreshold ) {
2109+ reset = true ;
2110+ replQueueCallsSinceReset = 0 ;
2111+ } else {
2112+ replQueueCallsSinceReset ++;
21142113 }
2115- // Choose the blocks to be reconstructed
2116- blocksToReconstruct = neededReconstruction
2117- .chooseLowRedundancyBlocks (blocksToProcess , reset );
2118- } finally {
2119- namesystem .writeUnlock ("computeBlockReconstructionWork" );
21202114 }
2121- return computeReconstructionWorkForBlocks ( blocksToReconstruct );
2115+ return scheduleReconstructionWorkForBlocks ( blocksToProcess , reset );
21222116 }
21232117
2118+
21242119 /**
21252120 * Reconstruct a set of blocks to full strength through replication or
21262121 * erasure coding
@@ -2129,29 +2124,39 @@ int computeBlockReconstructionWork(int blocksToProcess) {
21292124 * @return the number of blocks scheduled for replication
21302125 */
21312126 @ VisibleForTesting
2132- int computeReconstructionWorkForBlocks (
2133- List <List <BlockInfo >> blocksToReconstruct ) {
2127+ int scheduleReconstructionWorkForBlocks (int blocksToProcess , boolean resetIterators ) {
21342128 int scheduledWork = 0 ;
21352129 List <BlockReconstructionWork > reconWork = new ArrayList <>();
2136-
2137- // Step 1: categorize at-risk blocks into replication and EC tasks
21382130 namesystem .writeLock ();
2131+ int priority = 0 ;
2132+ // Step 1: categorize at-risk blocks into replication and EC tasks
21392133 try {
21402134 synchronized (neededReconstruction ) {
2141- for (int priority = 0 ; priority < blocksToReconstruct
2142- .size (); priority ++) {
2143- for (BlockInfo block : blocksToReconstruct .get (priority )) {
2144- BlockReconstructionWork rw = scheduleReconstruction (block ,
2145- priority );
2135+ for (; blocksToProcess > 0 && priority < LEVEL ; priority ++) {
2136+ List <BlockInfo > blocks = new ArrayList <>();
2137+ int processed = neededReconstruction .
2138+ chooseLowRedundancyBlocksForPriority (priority , blocksToProcess , blocks );
2139+ if (processed == 0 )
2140+ break ;
2141+ for (BlockInfo block : blocks ) {
2142+ BlockReconstructionWork rw = generateReconstructionForBlock (block ,
2143+ priority );
21462144 if (rw != null ) {
21472145 reconWork .add (rw );
2146+ // if we constructed effective work, reduce the budget
2147+ blocksToProcess --;
21482148 }
21492149 }
21502150 }
21512151 }
21522152 } finally {
2153- namesystem .writeUnlock ("computeReconstructionWorkForBlocks " );
2153+ namesystem .writeUnlock ("generateReconstructionWorkForBlocks " );
21542154 }
2155+ if (priority == LEVEL || resetIterators ) {
2156+ // Reset all bookmarks because there were no recently added blocks.
2157+ neededReconstruction .resetIterators ();
2158+ }
2159+
21552160
21562161 // Step 2: choose target nodes for each reconstruction task
21572162 for (BlockReconstructionWork rw : reconWork ) {
@@ -2161,7 +2166,7 @@ int computeReconstructionWorkForBlocks(
21612166
21622167 // Exclude all nodes which already exists as targets for the block
21632168 List <DatanodeStorageInfo > targets =
2164- pendingReconstruction .getTargets (rw .getBlock ());
2169+ pendingReconstruction .getTargets (rw .getBlock ());
21652170 if (targets != null ) {
21662171 for (DatanodeStorageInfo dn : targets ) {
21672172 excludedNodes .add (dn .getDatanodeDescriptor ());
@@ -2170,7 +2175,7 @@ int computeReconstructionWorkForBlocks(
21702175
21712176 // choose replication targets: NOT HOLDING THE GLOBAL LOCK
21722177 final BlockPlacementPolicy placementPolicy =
2173- placementPolicies .getPolicy (rw .getBlock ().getBlockType ());
2178+ placementPolicies .getPolicy (rw .getBlock ().getBlockType ());
21742179 rw .chooseTargets (placementPolicy , storagePolicySuite , excludedNodes );
21752180 }
21762181
@@ -2191,7 +2196,7 @@ int computeReconstructionWorkForBlocks(
21912196 }
21922197 }
21932198 } finally {
2194- namesystem .writeUnlock ("computeReconstructionWorkForBlocks " );
2199+ namesystem .writeUnlock ("scheduleReconstructionWorkForBlocks " );
21952200 }
21962201
21972202 if (blockLog .isDebugEnabled ()) {
@@ -2204,16 +2209,17 @@ int computeReconstructionWorkForBlocks(
22042209 targetList .append (' ' ).append (target .getDatanodeDescriptor ());
22052210 }
22062211 blockLog .debug ("BLOCK* ask {} to replicate {} to {}" ,
2207- rw .getSrcNodes (), rw .getBlock (), targetList );
2212+ rw .getSrcNodes (), rw .getBlock (), targetList );
22082213 }
22092214 }
22102215 blockLog .debug ("BLOCK* neededReconstruction = {} pendingReconstruction = {}" ,
2211- neededReconstruction .size (), pendingReconstruction .size ());
2216+ neededReconstruction .size (), pendingReconstruction .size ());
22122217 }
22132218
22142219 return scheduledWork ;
22152220 }
22162221
2222+
22172223 // Check if the number of live + pending replicas satisfies
22182224 // the expected redundancy.
22192225 boolean hasEnoughEffectiveReplicas (BlockInfo block ,
@@ -2225,7 +2231,7 @@ boolean hasEnoughEffectiveReplicas(BlockInfo block,
22252231 }
22262232
22272233 @ VisibleForTesting
2228- BlockReconstructionWork scheduleReconstruction (BlockInfo block ,
2234+ BlockReconstructionWork generateReconstructionForBlock (BlockInfo block ,
22292235 int priority ) {
22302236 // skip abandoned block or block reopened for append
22312237 if (block .isDeleted () || !block .isCompleteOrCommitted ()) {
@@ -2615,7 +2621,7 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block,
26152621 }
26162622
26172623 // for EC here need to make sure the numReplicas replicates state correct
2618- // because in the scheduleReconstruction it need the numReplicas to check
2624+ // because in the generateReconstructionForBlock it need the numReplicas to check
26192625 // whether need to reconstruct the ec internal block
26202626 byte blockIndex = -1 ;
26212627 if (isStriped ) {
@@ -4954,7 +4960,7 @@ public void removeBlock(BlockInfo block) {
49544960 DatanodeStorageInfo .decrementBlocksScheduled (remove .getTargets ()
49554961 .toArray (new DatanodeStorageInfo [remove .getTargets ().size ()]));
49564962 }
4957- neededReconstruction .remove (block , LowRedundancyBlocks . LEVEL );
4963+ neededReconstruction .remove (block , LEVEL );
49584964 postponedMisreplicatedBlocks .remove (block );
49594965 }
49604966
@@ -5405,7 +5411,7 @@ int computeDatanodeWork() {
54055411 final int nodesToProcess = (int ) Math .ceil (numlive
54065412 * this .blocksInvalidateWorkPct );
54075413
5408- int workFound = this .computeBlockReconstructionWork (blocksToProcess );
5414+ int workFound = this .scheduleBlockReconstructionWork (blocksToProcess );
54095415
54105416 // Update counters
54115417 namesystem .writeLock ();
0 commit comments