Skip to content

Commit 7909e29

Browse files
authored
HBASE-24760 Add a config hbase.rsgroup.fallback.enable for RSGroup fallback feature (#2149)
Signed-off-by: Guanghao Zhang <[email protected]>
1 parent 047e061 commit 7909e29

File tree

3 files changed

+101
-109
lines changed

3 files changed

+101
-109
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java

Lines changed: 56 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@
1919

2020
import java.io.IOException;
2121
import java.util.ArrayList;
22-
import java.util.Collection;
2322
import java.util.Collections;
2423
import java.util.HashMap;
25-
import java.util.HashSet;
2624
import java.util.List;
2725
import java.util.Map;
2826
import java.util.Set;
@@ -51,7 +49,6 @@
5149

5250
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
5351
import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
54-
import org.apache.hbase.thirdparty.com.google.common.collect.LinkedListMultimap;
5552
import org.apache.hbase.thirdparty.com.google.common.collect.ListMultimap;
5653
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
5754
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
@@ -81,15 +78,15 @@ public class RSGroupBasedLoadBalancer implements LoadBalancer {
8178
private LoadBalancer internalBalancer;
8279

8380
/**
84-
* Define the config key of fallback groups
85-
* Enabled only if this property is set
81+
* Set this key to {@code true} to allow region fallback.
82+
* Fallback to the default rsgroup first, then fallback to any group if no online servers in
83+
* default rsgroup.
8684
* Please keep balancer switch on at the same time, which is relied on to correct misplaced
8785
* regions
8886
*/
89-
public static final String FALLBACK_GROUPS_KEY = "hbase.rsgroup.fallback.groups";
87+
public static final String FALLBACK_GROUP_ENABLE_KEY = "hbase.rsgroup.fallback.enable";
9088

9189
private boolean fallbackEnabled = false;
92-
private Set<String> fallbackGroups;
9390

9491
/**
9592
* Used by reflection in {@link org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory}.
@@ -180,22 +177,14 @@ public List<RegionPlan> balanceCluster(
180177
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(
181178
List<RegionInfo> regions, List<ServerName> servers) throws IOException {
182179
Map<ServerName, List<RegionInfo>> assignments = Maps.newHashMap();
183-
ListMultimap<String, RegionInfo> regionMap = ArrayListMultimap.create();
184-
ListMultimap<String, ServerName> serverMap = ArrayListMultimap.create();
185-
generateGroupMaps(regions, servers, regionMap, serverMap);
186-
for (String groupKey : regionMap.keySet()) {
187-
if (regionMap.get(groupKey).size() > 0) {
188-
Map<ServerName, List<RegionInfo>> result = this.internalBalancer
189-
.roundRobinAssignment(regionMap.get(groupKey), serverMap.get(groupKey));
190-
if (result != null) {
191-
if (result.containsKey(LoadBalancer.BOGUS_SERVER_NAME) &&
192-
assignments.containsKey(LoadBalancer.BOGUS_SERVER_NAME)) {
193-
assignments.get(LoadBalancer.BOGUS_SERVER_NAME)
194-
.addAll(result.get(LoadBalancer.BOGUS_SERVER_NAME));
195-
} else {
196-
assignments.putAll(result);
197-
}
198-
}
180+
List<Pair<List<RegionInfo>, List<ServerName>>> pairs =
181+
generateGroupAssignments(regions, servers);
182+
for (Pair<List<RegionInfo>, List<ServerName>> pair : pairs) {
183+
Map<ServerName, List<RegionInfo>> result = this.internalBalancer
184+
.roundRobinAssignment(pair.getFirst(), pair.getSecond());
185+
if (result != null) {
186+
result.forEach((server, regionInfos) ->
187+
assignments.computeIfAbsent(server, s -> Lists.newArrayList()).addAll(regionInfos));
199188
}
200189
}
201190
return assignments;
@@ -206,36 +195,16 @@ public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, Server
206195
List<ServerName> servers) throws HBaseIOException {
207196
try {
208197
Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
209-
ListMultimap<String, RegionInfo> groupToRegion = ArrayListMultimap.create();
210-
RSGroupInfo defaultInfo = rsGroupInfoManager.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
211-
for (RegionInfo region : regions.keySet()) {
212-
String groupName =
213-
RSGroupUtil.getRSGroupInfo(masterServices, rsGroupInfoManager, region.getTable())
214-
.orElse(defaultInfo).getName();
215-
groupToRegion.put(groupName, region);
216-
}
217-
for (String group : groupToRegion.keySet()) {
218-
Map<RegionInfo, ServerName> currentAssignmentMap = new TreeMap<RegionInfo, ServerName>();
219-
List<RegionInfo> regionList = groupToRegion.get(group);
220-
RSGroupInfo info = rsGroupInfoManager.getRSGroup(group);
221-
List<ServerName> candidateList = filterOfflineServers(info, servers);
222-
if (fallbackEnabled && candidateList.isEmpty()) {
223-
candidateList = getFallBackCandidates(servers);
224-
}
225-
for (RegionInfo region : regionList) {
226-
currentAssignmentMap.put(region, regions.get(region));
227-
}
228-
if (candidateList.size() > 0) {
229-
assignments
230-
.putAll(this.internalBalancer.retainAssignment(currentAssignmentMap, candidateList));
231-
} else {
232-
if (LOG.isDebugEnabled()) {
233-
LOG.debug("No available servers for group {} to assign regions: {}", group,
234-
RegionInfo.getShortNameToLog(regionList));
235-
}
236-
assignments.computeIfAbsent(LoadBalancer.BOGUS_SERVER_NAME, s -> new ArrayList<>())
237-
.addAll(regionList);
238-
}
198+
List<Pair<List<RegionInfo>, List<ServerName>>> pairs =
199+
generateGroupAssignments(Lists.newArrayList(regions.keySet()), servers);
200+
for (Pair<List<RegionInfo>, List<ServerName>> pair : pairs) {
201+
List<RegionInfo> regionList = pair.getFirst();
202+
Map<RegionInfo, ServerName> currentAssignmentMap = Maps.newTreeMap();
203+
regionList.forEach(r -> currentAssignmentMap.put(r, regions.get(r)));
204+
Map<ServerName, List<RegionInfo>> pairResult =
205+
this.internalBalancer.retainAssignment(currentAssignmentMap, pair.getSecond());
206+
pairResult.forEach((server, rs) ->
207+
assignments.computeIfAbsent(server, s -> Lists.newArrayList()).addAll(rs));
239208
}
240209
return assignments;
241210
} catch (IOException e) {
@@ -246,17 +215,17 @@ public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, Server
246215
@Override
247216
public ServerName randomAssignment(RegionInfo region,
248217
List<ServerName> servers) throws IOException {
249-
ListMultimap<String,RegionInfo> regionMap = LinkedListMultimap.create();
250-
ListMultimap<String,ServerName> serverMap = LinkedListMultimap.create();
251-
generateGroupMaps(Lists.newArrayList(region), servers, regionMap, serverMap);
252-
List<ServerName> filteredServers = serverMap.get(regionMap.keySet().iterator().next());
218+
List<Pair<List<RegionInfo>, List<ServerName>>> pairs =
219+
generateGroupAssignments(Lists.newArrayList(region), servers);
220+
List<ServerName> filteredServers = pairs.iterator().next().getSecond();
253221
return this.internalBalancer.randomAssignment(region, filteredServers);
254222
}
255223

256-
private void generateGroupMaps(List<RegionInfo> regions, List<ServerName> servers,
257-
ListMultimap<String, RegionInfo> regionMap, ListMultimap<String, ServerName> serverMap)
258-
throws HBaseIOException {
224+
private List<Pair<List<RegionInfo>, List<ServerName>>> generateGroupAssignments(
225+
List<RegionInfo> regions, List<ServerName> servers) throws HBaseIOException {
259226
try {
227+
ListMultimap<String, RegionInfo> regionMap = ArrayListMultimap.create();
228+
ListMultimap<String, ServerName> serverMap = ArrayListMultimap.create();
260229
RSGroupInfo defaultInfo = rsGroupInfoManager.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
261230
for (RegionInfo region : regions) {
262231
String groupName =
@@ -267,15 +236,29 @@ private void generateGroupMaps(List<RegionInfo> regions, List<ServerName> server
267236
for (String groupKey : regionMap.keySet()) {
268237
RSGroupInfo info = rsGroupInfoManager.getRSGroup(groupKey);
269238
serverMap.putAll(groupKey, filterOfflineServers(info, servers));
270-
if (fallbackEnabled && serverMap.get(groupKey).isEmpty()) {
271-
serverMap.putAll(groupKey, getFallBackCandidates(servers));
272-
}
239+
}
240+
241+
List<Pair<List<RegionInfo>, List<ServerName>>> result = Lists.newArrayList();
242+
List<RegionInfo> fallbackRegions = Lists.newArrayList();
243+
for (String groupKey : regionMap.keySet()) {
273244
if (serverMap.get(groupKey).isEmpty()) {
274-
serverMap.put(groupKey, LoadBalancer.BOGUS_SERVER_NAME);
245+
fallbackRegions.addAll(regionMap.get(groupKey));
246+
} else {
247+
result.add(Pair.newPair(regionMap.get(groupKey), serverMap.get(groupKey)));
248+
}
249+
}
250+
if (!fallbackRegions.isEmpty()) {
251+
List<ServerName> candidates = null;
252+
if (fallbackEnabled) {
253+
candidates = getFallBackCandidates(servers);
275254
}
255+
candidates = (candidates == null || candidates.isEmpty()) ?
256+
Lists.newArrayList(BOGUS_SERVER_NAME) : candidates;
257+
result.add(Pair.newPair(fallbackRegions, candidates));
276258
}
259+
return result;
277260
} catch(IOException e) {
278-
throw new HBaseIOException("Failed to generate group maps", e);
261+
throw new HBaseIOException("Failed to generate group assignments", e);
279262
}
280263
}
281264

@@ -390,11 +373,7 @@ public void initialize() throws IOException {
390373
}
391374
internalBalancer.initialize();
392375
// init fallback groups
393-
Collection<String> groups = config.getTrimmedStringCollection(FALLBACK_GROUPS_KEY);
394-
if (groups != null && !groups.isEmpty()) {
395-
this.fallbackEnabled = true;
396-
this.fallbackGroups = new HashSet<>(groups);
397-
}
376+
this.fallbackEnabled = config.getBoolean(FALLBACK_GROUP_ENABLE_KEY, false);
398377
}
399378

400379
public boolean isOnline() {
@@ -485,15 +464,13 @@ public List<RegionPlan> balanceTable(TableName tableName,
485464
}
486465

487466
private List<ServerName> getFallBackCandidates(List<ServerName> servers) {
488-
List<ServerName> serverNames = new ArrayList<>();
489-
for (String fallbackGroup : fallbackGroups) {
490-
try {
491-
RSGroupInfo info = rsGroupInfoManager.getRSGroup(fallbackGroup);
492-
serverNames.addAll(filterOfflineServers(info, servers));
493-
} catch (IOException e) {
494-
LOG.error("Get group info for {} failed", fallbackGroup, e);
495-
}
467+
List<ServerName> serverNames = null;
468+
try {
469+
RSGroupInfo info = rsGroupInfoManager.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
470+
serverNames = filterOfflineServers(info, servers);
471+
} catch (IOException e) {
472+
LOG.error("Failed to get default rsgroup info to fallback", e);
496473
}
497-
return serverNames;
474+
return serverNames == null || serverNames.isEmpty() ? servers : serverNames;
498475
}
499476
}

hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,12 @@ protected final void removeGroup(String groupName) throws IOException {
206206
}
207207
}
208208
ADMIN.setRSGroup(tables, RSGroupInfo.DEFAULT_GROUP);
209+
for (NamespaceDescriptor nd : ADMIN.listNamespaceDescriptors()) {
210+
if (groupName.equals(nd.getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP))) {
211+
nd.removeConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP);
212+
ADMIN.modifyNamespace(nd);
213+
}
214+
}
209215
RSGroupInfo groupInfo = ADMIN.getRSGroup(groupName);
210216
ADMIN.moveServersToRSGroup(groupInfo.getServers(), RSGroupInfo.DEFAULT_GROUP);
211217
ADMIN.removeRSGroup(groupName);

hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsFallback.java

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424

2525
import org.apache.hadoop.conf.Configuration;
2626
import org.apache.hadoop.hbase.HBaseClassTestRule;
27+
import org.apache.hadoop.hbase.HBaseTestingUtility;
28+
import org.apache.hadoop.hbase.TableName;
2729
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
2830
import org.apache.hadoop.hbase.client.TableDescriptor;
2931
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
@@ -32,6 +34,7 @@
3234
import org.apache.hadoop.hbase.testclassification.MediumTests;
3335
import org.apache.hadoop.hbase.testclassification.RSGroupTests;
3436
import org.apache.hadoop.hbase.util.Bytes;
37+
import org.apache.hadoop.hbase.util.JVMClusterUtil;
3538
import org.apache.hadoop.hbase.util.Threads;
3639
import org.junit.After;
3740
import org.junit.AfterClass;
@@ -56,8 +59,8 @@ public class TestRSGroupsFallback extends TestRSGroupsBase {
5659

5760
@BeforeClass
5861
public static void setUp() throws Exception {
59-
Configuration configuration = TEST_UTIL.getConfiguration();
60-
configuration.set(RSGroupBasedLoadBalancer.FALLBACK_GROUPS_KEY, FALLBACK_GROUP);
62+
Configuration conf = TEST_UTIL.getConfiguration();
63+
conf.setBoolean(RSGroupBasedLoadBalancer.FALLBACK_GROUP_ENABLE_KEY, true);
6164
setUpTestBeforeClass();
6265
MASTER.balanceSwitch(true);
6366
}
@@ -78,51 +81,57 @@ public void afterMethod() throws Exception {
7881
}
7982

8083
@Test
81-
public void testGroupFallback() throws Exception {
84+
public void testFallback() throws Exception {
8285
// add fallback group
8386
addGroup(FALLBACK_GROUP, 1);
8487
// add test group
8588
String groupName = getGroupName(name.getMethodName());
8689
addGroup(groupName, 1);
8790
TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
88-
.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("f")).build())
89-
.setRegionServerGroup(groupName)
90-
.build();
91-
ADMIN.createTable(desc);
91+
.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("f")).build())
92+
.setRegionServerGroup(groupName)
93+
.build();
94+
ADMIN.createTable(desc, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
9295
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
93-
// server of test group crash
94-
for (Address server : ADMIN.getRSGroup(groupName).getServers()) {
95-
AssignmentTestingUtil.crashRs(TEST_UTIL, getServerName(server), true);
96-
}
97-
Threads.sleep(1000);
98-
TEST_UTIL.waitUntilNoRegionsInTransition(10000);
99-
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
100-
101-
// regions move to fallback group
102-
assertRegionsInGroup(FALLBACK_GROUP);
96+
// server of test group crash, regions move to default group
97+
crashRsInGroup(groupName);
98+
assertRegionsInGroup(tableName, RSGroupInfo.DEFAULT_GROUP);
10399

104-
// move a new server from default group
105-
Address address = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().first();
106-
ADMIN.moveServersToRSGroup(Collections.singleton(address), groupName);
100+
// server of default group crash, regions move to any other group
101+
crashRsInGroup(RSGroupInfo.DEFAULT_GROUP);
102+
assertRegionsInGroup(tableName, FALLBACK_GROUP);
107103

108-
// correct misplaced regions
104+
// add a new server to default group, regions move to default group
105+
TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000);
109106
MASTER.balance();
107+
assertRegionsInGroup(tableName, RSGroupInfo.DEFAULT_GROUP);
110108

111-
TEST_UTIL.waitUntilNoRegionsInTransition(10000);
112-
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
113-
114-
// regions move back
115-
assertRegionsInGroup(groupName);
109+
// add a new server to test group, regions move back
110+
JVMClusterUtil.RegionServerThread t =
111+
TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000);
112+
ADMIN.moveServersToRSGroup(
113+
Collections.singleton(t.getRegionServer().getServerName().getAddress()), groupName);
114+
MASTER.balance();
115+
assertRegionsInGroup(tableName, groupName);
116116

117117
TEST_UTIL.deleteTable(tableName);
118118
}
119119

120-
private void assertRegionsInGroup(String group) throws IOException {
121-
RSGroupInfo fallbackGroup = ADMIN.getRSGroup(group);
122-
MASTER.getAssignmentManager().getRegionStates().getRegionsOfTable(tableName).forEach(region -> {
120+
private void assertRegionsInGroup(TableName table, String group) throws IOException {
121+
TEST_UTIL.waitUntilAllRegionsAssigned(table);
122+
RSGroupInfo rsGroup = ADMIN.getRSGroup(group);
123+
MASTER.getAssignmentManager().getRegionStates().getRegionsOfTable(table).forEach(region -> {
123124
Address regionOnServer = MASTER.getAssignmentManager().getRegionStates()
124125
.getRegionAssignments().get(region).getAddress();
125-
assertTrue(fallbackGroup.getServers().contains(regionOnServer));
126+
assertTrue(rsGroup.getServers().contains(regionOnServer));
126127
});
127128
}
129+
130+
private void crashRsInGroup(String groupName) throws Exception {
131+
for (Address server : ADMIN.getRSGroup(groupName).getServers()) {
132+
AssignmentTestingUtil.crashRs(TEST_UTIL, getServerName(server), true);
133+
}
134+
Threads.sleep(1000);
135+
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
136+
}
128137
}

0 commit comments

Comments
 (0)