Skip to content

Commit 80c1f56

Browse files
committed
HBASE-28582 ModifyTableProcedure should not reset TRSP on region node when closing unused region replicas
1 parent 23fa363 commit 80c1f56

File tree

4 files changed

+210
-9
lines changed

4 files changed

+210
-9
lines changed

hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,3 +780,13 @@ enum MigrateNamespaceTableProcedureState {
780780

781781
message MigrateNamespaceTableProcedureStateData {
782782
}
783+
784+
enum CloseExcessRegionReplicasProcedureState {
785+
CLOSE_EXCESS_REGION_REPLICAS_SCHEDULE = 1;
786+
CLOSE_EXCESS_REGION_REPLICAS_CONFIRM = 2;
787+
}
788+
789+
message CloseExcessRegionReplicasProcedureStateData {
790+
required TableName table_name = 1;
791+
required uint32 new_replica_count = 2;
792+
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.concurrent.atomic.AtomicBoolean;
3434
import java.util.concurrent.locks.Condition;
3535
import java.util.concurrent.locks.ReentrantLock;
36+
import java.util.function.Consumer;
3637
import java.util.stream.Collectors;
3738
import java.util.stream.Stream;
3839
import org.apache.hadoop.conf.Configuration;
@@ -1084,14 +1085,55 @@ public TransitRegionStateProcedure[] createUnassignProceduresForDisabling(TableN
10841085
}
10851086

10861087
/**
1087-
* Called by ModifyTableProcedures to unassign all the excess region replicas for a table.
1088+
* Called by ModifyTableProcedure to unassign all the excess region replicas for a table. Will
1089+
* skip submit unassign procedure if the region is in transition, so you may need to call this
1090+
* method multiple times.
1091+
* @param tableName the table for closing excess region replicas
1092+
* @param newReplicaCount the new replica count, should be less than current replica count
1093+
* @param submit for submitting procedure
1094+
* @return the number of regions in transition that we can not schedule unassign procedures
10881095
*/
1089-
public TransitRegionStateProcedure[] createUnassignProceduresForClosingExcessRegionReplicas(
1090-
TableName tableName, int newReplicaCount) {
1091-
return regionStates.getTableRegionStateNodes(tableName).stream()
1092-
.filter(regionNode -> regionNode.getRegionInfo().getReplicaId() >= newReplicaCount)
1093-
.map(this::forceCreateUnssignProcedure).filter(p -> p != null)
1094-
.toArray(TransitRegionStateProcedure[]::new);
1096+
public int submitUnassignProcedureForClosingExcessRegionReplicas(TableName tableName,
1097+
int newReplicaCount, Consumer<TransitRegionStateProcedure> submit) {
1098+
int inTransitionCount = 0;
1099+
for (RegionStateNode regionNode : regionStates.getTableRegionStateNodes(tableName)) {
1100+
regionNode.lock();
1101+
try {
1102+
if (regionNode.getRegionInfo().getReplicaId() >= newReplicaCount) {
1103+
if (regionNode.isInTransition()) {
1104+
LOG.debug("skip scheduling unassign procedure for {} when closing excess region "
1105+
+ "replicas since it is in transition", regionNode);
1106+
inTransitionCount++;
1107+
continue;
1108+
}
1109+
if (regionNode.isInState(State.OFFLINE, State.CLOSED, State.SPLIT)) {
1110+
continue;
1111+
}
1112+
submit.accept(regionNode.setProcedure(TransitRegionStateProcedure
1113+
.unassign(getProcedureEnvironment(), regionNode.getRegionInfo())));
1114+
}
1115+
} finally {
1116+
regionNode.unlock();
1117+
}
1118+
}
1119+
return inTransitionCount;
1120+
}
1121+
1122+
public int numberOfUnclosedExcessRegionReplicas(TableName tableName, int newReplicaCount) {
1123+
int unclosed = 0;
1124+
for (RegionStateNode regionNode : regionStates.getTableRegionStateNodes(tableName)) {
1125+
regionNode.lock();
1126+
try {
1127+
if (regionNode.getRegionInfo().getReplicaId() >= newReplicaCount) {
1128+
if (!regionNode.isInState(State.OFFLINE, State.CLOSED, State.SPLIT)) {
1129+
unclosed++;
1130+
}
1131+
}
1132+
} finally {
1133+
regionNode.unlock();
1134+
}
1135+
}
1136+
return unclosed;
10951137
}
10961138

10971139
public SplitTableRegionProcedure createSplitProcedure(final RegionInfo regionToSplit,
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.master.procedure;
19+
20+
import java.io.IOException;
21+
import org.apache.commons.lang3.mutable.MutableBoolean;
22+
import org.apache.hadoop.hbase.TableName;
23+
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
24+
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
25+
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
26+
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
27+
import org.apache.hadoop.hbase.util.RetryCounter;
28+
import org.apache.yetus.audience.InterfaceAudience;
29+
import org.slf4j.Logger;
30+
import org.slf4j.LoggerFactory;
31+
32+
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
33+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CloseExcessRegionReplicasProcedureState;
34+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CloseExcessRegionReplicasProcedureStateData;
35+
36+
/**
37+
* Procedure for close excess region replicas.
38+
*/
39+
@InterfaceAudience.Private
40+
public class CloseExcessRegionReplicasProcedure
41+
extends AbstractStateMachineTableProcedure<CloseExcessRegionReplicasProcedureState> {
42+
43+
private static final Logger LOG =
44+
LoggerFactory.getLogger(CloseExcessRegionReplicasProcedure.class);
45+
46+
private TableName tableName;
47+
private int newReplicaCount;
48+
49+
private RetryCounter retryCounter;
50+
51+
public CloseExcessRegionReplicasProcedure() {
52+
}
53+
54+
public CloseExcessRegionReplicasProcedure(TableName tableName, int newReplicaCount) {
55+
this.tableName = tableName;
56+
this.newReplicaCount = newReplicaCount;
57+
}
58+
59+
@Override
60+
public TableName getTableName() {
61+
return tableName;
62+
}
63+
64+
@Override
65+
public TableOperationType getTableOperationType() {
66+
return TableOperationType.REGION_EDIT;
67+
}
68+
69+
@Override
70+
protected Flow executeFromState(MasterProcedureEnv env,
71+
CloseExcessRegionReplicasProcedureState state)
72+
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
73+
LOG.trace("{} execute state={}", this, state);
74+
switch (state) {
75+
case CLOSE_EXCESS_REGION_REPLICAS_SCHEDULE:
76+
MutableBoolean submitted = new MutableBoolean(false);
77+
int inTransitionCount = env.getAssignmentManager()
78+
.submitUnassignProcedureForClosingExcessRegionReplicas(tableName, newReplicaCount, p -> {
79+
submitted.setTrue();
80+
addChildProcedure(p);
81+
});
82+
if (inTransitionCount > 0 && submitted.isFalse()) {
83+
// we haven't scheduled any unassign procedures and there are still regions in
84+
// transition, sleep for a while and try again
85+
if (retryCounter == null) {
86+
retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
87+
}
88+
long backoffMillis = retryCounter.getBackoffTimeAndIncrementAttempts();
89+
LOG.info(
90+
"There are still {} region(s) in transition for table {} when closing excess"
91+
+ " region replicas, suspend {}secs and try again later",
92+
inTransitionCount, tableName, backoffMillis / 1000);
93+
suspend(inTransitionCount, true);
94+
}
95+
setNextState(CloseExcessRegionReplicasProcedureState.CLOSE_EXCESS_REGION_REPLICAS_CONFIRM);
96+
return Flow.HAS_MORE_STATE;
97+
case CLOSE_EXCESS_REGION_REPLICAS_CONFIRM:
98+
int unclosedCount = env.getAssignmentManager()
99+
.numberOfUnclosedExcessRegionReplicas(tableName, newReplicaCount);
100+
if (unclosedCount > 0) {
101+
LOG.info("There are still {} unclosed region(s) for table {} when closing excess"
102+
+ " region replicas, continue...");
103+
setNextState(
104+
CloseExcessRegionReplicasProcedureState.CLOSE_EXCESS_REGION_REPLICAS_SCHEDULE);
105+
} else {
106+
return Flow.NO_MORE_STATE;
107+
}
108+
default:
109+
throw new UnsupportedOperationException("unhandled state=" + state);
110+
}
111+
}
112+
113+
@Override
114+
protected void rollbackState(MasterProcedureEnv env,
115+
CloseExcessRegionReplicasProcedureState state) throws IOException, InterruptedException {
116+
throw new UnsupportedOperationException();
117+
}
118+
119+
@Override
120+
protected CloseExcessRegionReplicasProcedureState getState(int stateId) {
121+
return CloseExcessRegionReplicasProcedureState.forNumber(stateId);
122+
}
123+
124+
@Override
125+
protected int getStateId(CloseExcessRegionReplicasProcedureState state) {
126+
return state.getNumber();
127+
}
128+
129+
@Override
130+
protected CloseExcessRegionReplicasProcedureState getInitialState() {
131+
return CloseExcessRegionReplicasProcedureState.CLOSE_EXCESS_REGION_REPLICAS_SCHEDULE;
132+
}
133+
134+
@Override
135+
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
136+
CloseExcessRegionReplicasProcedureStateData data = CloseExcessRegionReplicasProcedureStateData
137+
.newBuilder().setTableName(ProtobufUtil.toProtoTableName(tableName))
138+
.setNewReplicaCount(newReplicaCount).build();
139+
serializer.serialize(data);
140+
}
141+
142+
@Override
143+
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
144+
CloseExcessRegionReplicasProcedureStateData data =
145+
serializer.deserialize(CloseExcessRegionReplicasProcedureStateData.class);
146+
tableName = ProtobufUtil.toTableName(data.getTableName());
147+
newReplicaCount = data.getNewReplicaCount();
148+
}
149+
150+
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,7 @@ private void closeExcessReplicasIfNeeded(MasterProcedureEnv env) {
525525
if (newReplicaCount >= oldReplicaCount) {
526526
return;
527527
}
528-
addChildProcedure(env.getAssignmentManager()
529-
.createUnassignProceduresForClosingExcessRegionReplicas(getTableName(), newReplicaCount));
528+
addChildProcedure(new CloseExcessRegionReplicasProcedure(getTableName(), newReplicaCount));
530529
}
531530

532531
/**

0 commit comments

Comments
 (0)