Skip to content

Commit 8452ef8

Browse files
committed
make use of new hdfs user configuration in namenode and datanode actions
1 parent d835d76 commit 8452ef8

File tree

3 files changed

+45
-41
lines changed

3 files changed

+45
-41
lines changed

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveNameNodeAction.java

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
import java.util.List;
2121
import org.apache.hadoop.conf.Configuration;
2222
import org.apache.hadoop.hbase.ServerName;
23-
import org.apache.hadoop.hbase.util.CommonFSUtils;
2423
import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
2524
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
2625
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
2726
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
2827
import org.apache.hadoop.hdfs.DFSUtil;
28+
import org.apache.hadoop.hdfs.DistributedFileSystem;
2929
import org.apache.hadoop.hdfs.HAUtil;
3030
import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo;
3131
import org.slf4j.Logger;
@@ -57,39 +57,47 @@ protected Logger getLogger() {
5757
@Override
5858
public void perform() throws Exception {
5959
getLogger().info("Performing action: Restart active namenode");
60-
Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
61-
String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
62-
if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
63-
throw new Exception("HA for namenode is not enabled");
64-
}
65-
ZKWatcher zkw = null;
66-
RecoverableZooKeeper rzk = null;
60+
61+
final String hadoopHAZkNode;
6762
String activeNamenode = null;
68-
String hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT);
69-
try {
70-
zkw = new ZKWatcher(conf, "get-active-namenode", null);
71-
rzk = zkw.getRecoverableZooKeeper();
72-
String hadoopHAZkNodePath = ZNodePaths.joinZNode(hadoopHAZkNode, nameServiceID);
73-
List<String> subChildern = ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath);
74-
for (String eachEntry : subChildern) {
75-
if (eachEntry.contains(ACTIVE_NN_LOCK_NAME)) {
76-
byte[] data =
77-
rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME), false, null);
78-
ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data);
79-
activeNamenode = proto.getHostname();
80-
}
63+
int activeNamenodePort = -1;
64+
try (final DistributedFileSystem dfs = HdfsActionUtils.createDfs(getConf())) {
65+
final Configuration conf = dfs.getConf();
66+
hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT);
67+
final String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
68+
69+
if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
70+
getLogger().info("HA for HDFS is not enabled; skipping");
71+
return;
8172
}
82-
} finally {
83-
if (zkw != null) {
84-
zkw.close();
73+
try (final ZKWatcher zkw = new ZKWatcher(conf, "get-active-namenode", null)) {
74+
final RecoverableZooKeeper rzk = zkw.getRecoverableZooKeeper();
75+
// If hadoopHAZkNode == '/', pass '' instead because then joinZNode will return '//' as a
76+
// prefix
77+
// which zk doesn't like as a prefix on the path.
78+
final String hadoopHAZkNodePath = ZNodePaths.joinZNode(
79+
(hadoopHAZkNode != null && hadoopHAZkNode.equals("/")) ? "" : hadoopHAZkNode,
80+
nameServiceID);
81+
final List<String> subChildren = ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath);
82+
for (final String eachEntry : subChildren) {
83+
if (eachEntry.contains(ACTIVE_NN_LOCK_NAME)) {
84+
byte[] data = rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME),
85+
false, null);
86+
ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data);
87+
activeNamenode = proto.getHostname();
88+
activeNamenodePort = proto.getPort();
89+
}
90+
}
8591
}
8692
}
93+
8794
if (activeNamenode == null) {
8895
throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
96+
} else {
97+
getLogger().info("Found Active NameNode host: {}", activeNamenode);
98+
final ServerName activeNNHost = ServerName.valueOf(activeNamenode, activeNamenodePort, -1L);
99+
getLogger().info("Restarting Active NameNode: {}", activeNamenode);
100+
restartNameNode(activeNNHost, this.sleepTime);
89101
}
90-
getLogger().info("Found active namenode host:" + activeNamenode);
91-
ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
92-
getLogger().info("Restarting Active NameNode :" + activeNamenode);
93-
restartNameNode(activeNNHost, sleepTime);
94102
}
95103
}

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomDataNodeAction.java

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,11 @@
1818
package org.apache.hadoop.hbase.chaos.actions;
1919

2020
import java.io.IOException;
21-
import java.util.ArrayList;
22-
import java.util.List;
21+
import java.util.Arrays;
2322
import org.apache.hadoop.hbase.ServerName;
2423
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
25-
import org.apache.hadoop.hbase.util.CommonFSUtils;
2624
import org.apache.hadoop.hdfs.DFSClient;
2725
import org.apache.hadoop.hdfs.DistributedFileSystem;
28-
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
2926
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
3027
import org.slf4j.Logger;
3128
import org.slf4j.LoggerFactory;
@@ -48,18 +45,15 @@ protected Logger getLogger() {
4845
@Override
4946
public void perform() throws Exception {
5047
getLogger().info("Performing action: Restart random data node");
51-
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes());
48+
final ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes());
5249
restartDataNode(server, sleepTime);
5350
}
5451

55-
public ServerName[] getDataNodes() throws IOException {
56-
DistributedFileSystem fs =
57-
(DistributedFileSystem) CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf());
58-
DFSClient dfsClient = fs.getClient();
59-
List<ServerName> hosts = new ArrayList<>();
60-
for (DatanodeInfo dataNode : dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE)) {
61-
hosts.add(ServerName.valueOf(dataNode.getHostName(), -1, -1));
52+
private ServerName[] getDataNodes() throws IOException {
53+
try (final DistributedFileSystem dfs = HdfsActionUtils.createDfs(getConf())) {
54+
final DFSClient dfsClient = dfs.getClient();
55+
return Arrays.stream(dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE))
56+
.map(dn -> ServerName.valueOf(dn.getHostName(), -1, -1)).toArray(ServerName[]::new);
6257
}
63-
return hosts.toArray(new ServerName[0]);
6458
}
6559
}

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction;
2424
import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction;
2525
import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
26+
import org.apache.hadoop.hbase.chaos.actions.RestartActiveNameNodeAction;
2627
import org.apache.hadoop.hbase.chaos.actions.RestartRandomDataNodeAction;
2728
import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
2829
import org.apache.hadoop.hbase.chaos.actions.RestartRandomZKNodeAction;
@@ -56,6 +57,7 @@ public ChaosMonkey build() {
5657
// only allow 2 servers to be dead.
5758
new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
5859
new ForceBalancerAction(),
60+
new RestartActiveNameNodeAction(60000),
5961
new RestartRandomDataNodeAction(60000),
6062
new RestartRandomZKNodeAction(60000),
6163
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),

0 commit comments

Comments
 (0)