Skip to content

Commit f99d7a0

Browse files
committed
HBASE-22819 Automatically migrate the rs group config for table after HBASE-22695
1 parent 07fe41d commit f99d7a0

File tree

4 files changed

+214
-15
lines changed

4 files changed

+214
-15
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfo.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@
1919
package org.apache.hadoop.hbase.rsgroup;
2020

2121
import java.util.Collection;
22-
import java.util.Set;
2322
import java.util.SortedSet;
2423
import java.util.TreeSet;
25-
2624
import org.apache.hadoop.hbase.TableName;
2725
import org.apache.hadoop.hbase.net.Address;
2826
import org.apache.yetus.audience.InterfaceAudience;
@@ -104,7 +102,7 @@ public boolean containsServer(Address hostPort) {
104102
/**
105103
* Get list of servers.
106104
*/
107-
public Set<Address> getServers() {
105+
public SortedSet<Address> getServers() {
108106
return servers;
109107
}
110108

hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@ Set<Address> moveServers(Set<Address> servers, String srcGroup, String dstGroup)
6767
*/
6868
List<RSGroupInfo> listRSGroups() throws IOException;
6969

70-
/**
71-
* Refresh/reload the group information from the persistent store
72-
*/
73-
void refresh() throws IOException;
74-
7570
/**
7671
* Whether the manager is able to fully return group metadata
7772
* @return whether the manager is in online mode

hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.Collections;
2424
import java.util.HashMap;
2525
import java.util.HashSet;
26+
import java.util.Iterator;
2627
import java.util.List;
2728
import java.util.Map;
2829
import java.util.OptionalLong;
@@ -32,6 +33,7 @@
3233
import org.apache.hadoop.conf.Configuration;
3334
import org.apache.hadoop.hbase.Coprocessor;
3435
import org.apache.hadoop.hbase.DoNotRetryIOException;
36+
import org.apache.hadoop.hbase.HConstants;
3537
import org.apache.hadoop.hbase.NamespaceDescriptor;
3638
import org.apache.hadoop.hbase.ServerName;
3739
import org.apache.hadoop.hbase.TableName;
@@ -164,7 +166,7 @@ private RSGroupInfoManagerImpl(MasterServices masterServices) throws IOException
164166

165167

166168
private synchronized void init() throws IOException {
167-
refresh();
169+
refresh(false);
168170
serverEventsListenerThread.start();
169171
masterServices.getServerManager().registerListener(serverEventsListenerThread);
170172
failedOpenUpdaterThread = new FailedOpenUpdaterThread(masterServices.getConfiguration());
@@ -356,9 +358,112 @@ private List<RSGroupInfo> retrieveGroupListFromZookeeper() throws IOException {
356358
return RSGroupInfoList;
357359
}
358360

359-
@Override
360-
public void refresh() throws IOException {
361-
refresh(false);
361+
private void waitUntilSomeProcsDone(Set<Long> pendingProcIds) {
362+
int size = pendingProcIds.size();
363+
while (!masterServices.isStopped()) {
364+
for (Iterator<Long> iter = pendingProcIds.iterator(); iter.hasNext();) {
365+
long procId = iter.next();
366+
if (masterServices.getMasterProcedureExecutor().isFinished(procId)) {
367+
iter.remove();
368+
}
369+
}
370+
if (pendingProcIds.size() < size) {
371+
return;
372+
}
373+
try {
374+
Thread.sleep(1000);
375+
} catch (InterruptedException e) {
376+
Thread.currentThread().interrupt();
377+
}
378+
}
379+
}
380+
381+
private void waitUntilMasterStarted() {
382+
while (!masterServices.isInitialized() && !masterServices.isStopped()) {
383+
try {
384+
Thread.sleep(1000);
385+
} catch (InterruptedException e) {
386+
Thread.currentThread().interrupt();
387+
}
388+
}
389+
}
390+
391+
private void migrate(List<RSGroupInfo> groupList, int maxConcurrency) {
392+
LOG.info("Start migrating table rs group config");
393+
waitUntilMasterStarted();
394+
Set<Long> pendingProcIds = new HashSet<>();
395+
for (RSGroupInfo groupInfo : groupList) {
396+
if (groupInfo.getName().equals(RSGroupInfo.DEFAULT_GROUP)) {
397+
continue;
398+
}
399+
SortedSet<TableName> failedTables = new TreeSet<>();
400+
for (TableName tableName : groupInfo.getTables()) {
401+
LOG.info("Migrating {} in group {}", tableName, groupInfo.getName());
402+
TableDescriptor oldTd;
403+
try {
404+
oldTd = masterServices.getTableDescriptors().get(tableName);
405+
} catch (IOException e) {
406+
LOG.warn("Failed to migrate {} in group {}", tableName, groupInfo.getName(), e);
407+
failedTables.add(tableName);
408+
continue;
409+
}
410+
if (oldTd == null) {
411+
continue;
412+
}
413+
if (oldTd.getRegionServerGroup().isPresent()) {
414+
// either we have already migrated it or that user has set the rs group with the new
415+
// code, skip.
416+
LOG.debug("Skip migrating {} since it is already in group {}", tableName,
417+
oldTd.getRegionServerGroup().get());
418+
continue;
419+
}
420+
TableDescriptor newTd = TableDescriptorBuilder.newBuilder(oldTd)
421+
.setRegionServerGroup(groupInfo.getName()).build();
422+
try {
423+
pendingProcIds.add(
424+
masterServices.modifyTable(tableName, newTd, HConstants.NO_NONCE, HConstants.NO_NONCE));
425+
} catch (IOException e) {
426+
LOG.warn("Failed to migrate {} in group {}", tableName, groupInfo.getName(), e);
427+
failedTables.add(tableName);
428+
continue;
429+
}
430+
if (pendingProcIds.size() >= maxConcurrency) {
431+
waitUntilSomeProcsDone(pendingProcIds);
432+
}
433+
}
434+
LOG.info("Done migrating {}, failed tables {}", groupInfo.getName(), failedTables);
435+
synchronized (RSGroupInfoManagerImpl.this) {
436+
RSGroupInfo currentInfo = rsGroupMap.get(groupInfo.getName());
437+
if (currentInfo != null) {
438+
RSGroupInfo newInfo =
439+
new RSGroupInfo(currentInfo.getName(), currentInfo.getServers(), failedTables);
440+
Map<String, RSGroupInfo> newGroupMap = new HashMap<>(rsGroupMap);
441+
newGroupMap.put(groupInfo.getName(), newInfo);
442+
try {
443+
flushConfig(newGroupMap);
444+
} catch (IOException e) {
445+
LOG.warn("Failed to persist rs group", e);
446+
}
447+
}
448+
}
449+
}
450+
LOG.info("Done migrating table rs group info");
451+
}
452+
453+
// Migrate the table rs group info from RSGroupInfo into the table descriptor
454+
// Notice that we do not want to block the initialize so this will be done in background, and
455+
// during the migrating, the rs group info maybe incomplete and cause region to be misplaced.
456+
private void migrate(List<RSGroupInfo> groupList) {
457+
final int maxConcurrency = 8;
458+
Thread migrateThread = new Thread("Migrate-RSGroup") {
459+
460+
@Override
461+
public void run() {
462+
migrate(groupList, maxConcurrency);
463+
}
464+
};
465+
migrateThread.setDaemon(true);
466+
migrateThread.start();
362467
}
363468

364469
/**
@@ -389,6 +494,7 @@ private synchronized void refresh(boolean forceOnline) throws IOException {
389494
}
390495
resetRSGroupMap(newGroupMap);
391496
updateCacheOfRSGroups(rsGroupMap.keySet());
497+
migrate(groupList);
392498
}
393499

394500
private void flushConfigTable(Map<String, RSGroupInfo> groupMap) throws IOException {
@@ -403,9 +509,9 @@ private void flushConfigTable(Map<String, RSGroupInfo> groupMap) throws IOExcept
403509
}
404510

405511
// populate puts
406-
for (RSGroupInfo RSGroupInfo : groupMap.values()) {
407-
RSGroupProtos.RSGroupInfo proto = ProtobufUtil.toProtoGroupInfo(RSGroupInfo);
408-
Put p = new Put(Bytes.toBytes(RSGroupInfo.getName()));
512+
for (RSGroupInfo rsGroupInfo : groupMap.values()) {
513+
RSGroupProtos.RSGroupInfo proto = ProtobufUtil.toProtoGroupInfo(rsGroupInfo);
514+
Put p = new Put(Bytes.toBytes(rsGroupInfo.getName()));
409515
p.addColumn(META_FAMILY_BYTES, META_QUALIFIER_BYTES, proto.toByteArray());
410516
mutations.add(p);
411517
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.rsgroup;
19+
20+
import static org.apache.hadoop.hbase.rsgroup.RSGroupInfoManagerImpl.*;
21+
import static org.junit.Assert.assertTrue;
22+
23+
import java.io.IOException;
24+
import org.apache.hadoop.hbase.HBaseClassTestRule;
25+
import org.apache.hadoop.hbase.TableName;
26+
import org.apache.hadoop.hbase.client.Put;
27+
import org.apache.hadoop.hbase.client.Table;
28+
import org.apache.hadoop.hbase.client.TableDescriptor;
29+
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
30+
import org.apache.hadoop.hbase.protobuf.generated.RSGroupProtos;
31+
import org.apache.hadoop.hbase.testclassification.MediumTests;
32+
import org.apache.hadoop.hbase.util.Bytes;
33+
import org.junit.AfterClass;
34+
import org.junit.BeforeClass;
35+
import org.junit.ClassRule;
36+
import org.junit.Test;
37+
import org.junit.experimental.categories.Category;
38+
39+
/**
40+
* Testcase for HBASE-22819
41+
*/
42+
@Category({ MediumTests.class })
43+
public class TestMigrateRSGroupInfo extends TestRSGroupsBase {
44+
45+
@ClassRule
46+
public static final HBaseClassTestRule CLASS_RULE =
47+
HBaseClassTestRule.forClass(TestMigrateRSGroupInfo.class);
48+
49+
private static String TABLE_NAME_PREFIX = "Table_";
50+
51+
private static int NUM_TABLES = 10;
52+
53+
private static byte[] FAMILY = Bytes.toBytes("family");
54+
55+
@BeforeClass
56+
public static void setUp() throws Exception {
57+
setUpTestBeforeClass();
58+
for (int i = 0; i < NUM_TABLES; i++) {
59+
TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME_PREFIX + i), FAMILY);
60+
}
61+
}
62+
63+
@AfterClass
64+
public static void tearDown() throws Exception {
65+
tearDownAfterClass();
66+
}
67+
68+
@Test
69+
public void testMigrate() throws IOException, InterruptedException {
70+
String groupName = name.getMethodName();
71+
addGroup(groupName, TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads().size() - 1);
72+
RSGroupInfo rsGroupInfo = rsGroupAdmin.getRSGroupInfo(groupName);
73+
assertTrue(rsGroupInfo.getTables().isEmpty());
74+
for (int i = 0; i < NUM_TABLES; i++) {
75+
rsGroupInfo.addTable(TableName.valueOf(TABLE_NAME_PREFIX + i));
76+
}
77+
try (Table table = TEST_UTIL.getConnection().getTable(RSGROUP_TABLE_NAME)) {
78+
RSGroupProtos.RSGroupInfo proto = ProtobufUtil.toProtoGroupInfo(rsGroupInfo);
79+
Put p = new Put(Bytes.toBytes(rsGroupInfo.getName()));
80+
p.addColumn(META_FAMILY_BYTES, META_QUALIFIER_BYTES, proto.toByteArray());
81+
table.put(p);
82+
}
83+
TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
84+
TEST_UTIL.getMiniHBaseCluster().startMaster();
85+
TEST_UTIL.waitFor(60000, () -> {
86+
for (int i = 0; i < NUM_TABLES; i++) {
87+
TableDescriptor td;
88+
try {
89+
td = TEST_UTIL.getAdmin().getDescriptor(TableName.valueOf(TABLE_NAME_PREFIX + i));
90+
} catch (IOException e) {
91+
return false;
92+
}
93+
if (!rsGroupInfo.getName().equals(td.getRegionServerGroup().orElse(null))) {
94+
return false;
95+
}
96+
}
97+
return true;
98+
});
99+
}
100+
}

0 commit comments

Comments
 (0)