Skip to content

Commit ee6897b

Browse files
Caroline Zhousaintstack
authored andcommitted
HBASE-23172 HBase Canary region success count metrics reflect column family successes, not region successes
1 parent 11bf114 commit ee6897b

File tree

2 files changed

+52
-29
lines changed

2 files changed

+52
-29
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ public void publishReadTiming(String znode, String server, long msTime) {
273273
public static class RegionStdOutSink extends StdOutSink {
274274
private Map<String, LongAdder> perTableReadLatency = new HashMap<>();
275275
private LongAdder writeLatency = new LongAdder();
276-
private final Map<String, RegionTaskResult> regionMap = new ConcurrentHashMap<>();
276+
private final Map<String, List<RegionTaskResult>> regionMap = new ConcurrentHashMap<>();
277277

278278
public void publishReadFailure(ServerName serverName, RegionInfo region, Exception e) {
279279
incReadFailureCount();
@@ -289,10 +289,13 @@ public void publishReadFailure(ServerName serverName, RegionInfo region,
289289

290290
public void publishReadTiming(ServerName serverName, RegionInfo region,
291291
ColumnFamilyDescriptor column, long msTime) {
292+
RegionTaskResult rtr = new RegionTaskResult(region, region.getTable(), serverName, column);
293+
rtr.setReadSuccess();
294+
rtr.setReadLatency(msTime);
295+
List<RegionTaskResult> rtrs = regionMap.get(region.getRegionNameAsString());
296+
rtrs.add(rtr);
297+
// Note that read success count will be equal to total column family read successes.
292298
incReadSuccessCount();
293-
RegionTaskResult res = this.regionMap.get(region.getRegionNameAsString());
294-
res.setReadSuccess();
295-
res.setReadLatency(msTime);
296299
LOG.info("Read from {} on {} {} in {}ms", region.getRegionNameAsString(), serverName,
297300
column.getNameAsString(), msTime);
298301
}
@@ -311,10 +314,13 @@ public void publishWriteFailure(ServerName serverName, RegionInfo region,
311314

312315
public void publishWriteTiming(ServerName serverName, RegionInfo region,
313316
ColumnFamilyDescriptor column, long msTime) {
317+
RegionTaskResult rtr = new RegionTaskResult(region, region.getTable(), serverName, column);
318+
rtr.setWriteSuccess();
319+
rtr.setWriteLatency(msTime);
320+
List<RegionTaskResult> rtrs = regionMap.get(region.getRegionNameAsString());
321+
rtrs.add(rtr);
322+
// Note that write success count will be equal to total column family write successes.
314323
incWriteSuccessCount();
315-
RegionTaskResult res = this.regionMap.get(region.getRegionNameAsString());
316-
res.setWriteSuccess();
317-
res.setWriteLatency(msTime);
318324
LOG.info("Write to {} on {} {} in {}ms",
319325
region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime);
320326
}
@@ -337,7 +343,7 @@ public LongAdder getWriteLatency() {
337343
return this.writeLatency;
338344
}
339345

340-
public Map<String, RegionTaskResult> getRegionMap() {
346+
public Map<String, List<RegionTaskResult>> getRegionMap() {
341347
return this.regionMap;
342348
}
343349

@@ -1046,15 +1052,18 @@ public static class RegionTaskResult {
10461052
private RegionInfo region;
10471053
private TableName tableName;
10481054
private ServerName serverName;
1055+
private ColumnFamilyDescriptor column;
10491056
private AtomicLong readLatency = null;
10501057
private AtomicLong writeLatency = null;
10511058
private boolean readSuccess = false;
10521059
private boolean writeSuccess = false;
10531060

1054-
public RegionTaskResult(RegionInfo region, TableName tableName, ServerName serverName) {
1061+
public RegionTaskResult(RegionInfo region, TableName tableName, ServerName serverName,
1062+
ColumnFamilyDescriptor column) {
10551063
this.region = region;
10561064
this.tableName = tableName;
10571065
this.serverName = serverName;
1066+
this.column = column;
10581067
}
10591068

10601069
public RegionInfo getRegionInfo() {
@@ -1081,6 +1090,14 @@ public String getServerNameAsString() {
10811090
return this.serverName.getServerName();
10821091
}
10831092

1093+
public ColumnFamilyDescriptor getColumnFamily() {
1094+
return this.column;
1095+
}
1096+
1097+
public String getColumnFamilyNameAsString() {
1098+
return this.column.getNameAsString();
1099+
}
1100+
10841101
public long getReadLatency() {
10851102
if (this.readLatency == null) {
10861103
return -1;
@@ -1566,9 +1583,8 @@ private static List<Future<Void>> sniff(final Admin admin, final Sink sink,
15661583
RegionInfo region = location.getRegion();
15671584
tasks.add(new RegionTask(admin.getConnection(), region, rs, (RegionStdOutSink)sink,
15681585
taskType, rawScanEnabled, rwLatency));
1569-
Map<String, RegionTaskResult> regionMap = ((RegionStdOutSink) sink).getRegionMap();
1570-
regionMap.put(region.getRegionNameAsString(), new RegionTaskResult(region,
1571-
region.getTable(), rs));
1586+
Map<String, List<RegionTaskResult>> regionMap = ((RegionStdOutSink) sink).getRegionMap();
1587+
regionMap.put(region.getRegionNameAsString(), new ArrayList<RegionTaskResult>());
15721588
}
15731589
return executor.invokeAll(tasks);
15741590
}

hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import static org.mockito.Mockito.times;
3333
import static org.mockito.Mockito.verify;
3434

35+
import java.util.List;
3536
import java.util.Map;
3637
import java.util.concurrent.ExecutorService;
3738
import java.util.concurrent.ScheduledThreadPoolExecutor;
@@ -146,34 +147,40 @@ public void testCanaryRegionTaskResult() throws Exception {
146147
String[] args = { "-writeSniffing", "-t", "10000", "testCanaryRegionTaskResult" };
147148
assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
148149

150+
assertTrue("canary should expect to scan at least 1 region",
151+
sink.getTotalExpectedRegions() > 0);
152+
assertTrue("there should be no read failures", sink.getReadFailureCount() == 0);
153+
assertTrue("there should be no write failures", sink.getWriteFailureCount() == 0);
149154
assertTrue("verify read success count > 0", sink.getReadSuccessCount() > 0);
150155
assertTrue("verify write success count > 0", sink.getWriteSuccessCount() > 0);
151156
verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class),
152157
isA(ColumnFamilyDescriptor.class), anyLong());
153158
verify(sink, atLeastOnce()).publishWriteTiming(isA(ServerName.class), isA(RegionInfo.class),
154159
isA(ColumnFamilyDescriptor.class), anyLong());
155160

156-
assertTrue("canary should expect to scan at least 1 region",
157-
sink.getTotalExpectedRegions() > 0);
158-
Map<String, CanaryTool.RegionTaskResult> regionMap = sink.getRegionMap();
161+
assertEquals("canary region success count should equal total expected regions",
162+
sink.getReadSuccessCount() + sink.getWriteSuccessCount(), sink.getTotalExpectedRegions());
163+
Map<String, List<CanaryTool.RegionTaskResult>> regionMap = sink.getRegionMap();
159164
assertFalse("verify region map has size > 0", regionMap.isEmpty());
160165

161166
for (String regionName : regionMap.keySet()) {
162-
CanaryTool.RegionTaskResult res = regionMap.get(regionName);
163-
assertNotNull("verify each expected region has a RegionTaskResult object in the map", res);
164-
assertNotNull("verify getRegionNameAsString()", regionName);
165-
assertNotNull("verify getRegionInfo()", res.getRegionInfo());
166-
assertNotNull("verify getTableName()", res.getTableName());
167-
assertNotNull("verify getTableNameAsString()", res.getTableNameAsString());
168-
assertNotNull("verify getServerName()", res.getServerName());
169-
assertNotNull("verify getServerNameAsString()", res.getServerNameAsString());
167+
for (CanaryTool.RegionTaskResult res: regionMap.get(regionName)) {
168+
assertNotNull("verify getRegionNameAsString()", regionName);
169+
assertNotNull("verify getRegionInfo()", res.getRegionInfo());
170+
assertNotNull("verify getTableName()", res.getTableName());
171+
assertNotNull("verify getTableNameAsString()", res.getTableNameAsString());
172+
assertNotNull("verify getServerName()", res.getServerName());
173+
assertNotNull("verify getServerNameAsString()", res.getServerNameAsString());
174+
assertNotNull("verify getColumnFamily()", res.getColumnFamily());
175+
assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString());
170176

171-
if (regionName.contains(CanaryTool.DEFAULT_WRITE_TABLE_NAME.getNameAsString())) {
172-
assertTrue("write to region " + regionName + " succeeded", res.isWriteSuccess());
173-
assertTrue("write took some time", res.getWriteLatency() > -1);
174-
} else {
175-
assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess());
176-
assertTrue("read took some time", res.getReadLatency() > -1);
177+
if (regionName.contains(CanaryTool.DEFAULT_WRITE_TABLE_NAME.getNameAsString())) {
178+
assertTrue("write to region " + regionName + " succeeded", res.isWriteSuccess());
179+
assertTrue("write took some time", res.getWriteLatency() > -1);
180+
} else {
181+
assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess());
182+
assertTrue("read took some time", res.getReadLatency() > -1);
183+
}
177184
}
178185
}
179186
}

0 commit comments

Comments
 (0)