Skip to content

Commit 36bb0d1

Browse files
authored
HBASE-15242: add client side metrics for timeout and remote exceptions. (#5023)
Signed-off-by: Andrew Purtell <[email protected]>
1 parent 22dbb7a commit 36bb0d1

File tree

3 files changed

+58
-16
lines changed

3 files changed

+58
-16
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.hadoop.conf.Configuration;
3838
import org.apache.hadoop.hbase.ServerName;
3939
import org.apache.hadoop.hbase.util.Bytes;
40+
import org.apache.hadoop.ipc.RemoteException;
4041
import org.apache.yetus.audience.InterfaceAudience;
4142

4243
import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
@@ -118,6 +119,9 @@ static String getScope(Configuration conf, String clusterId, Object connectionOb
118119

119120
private static final String CNT_BASE = "rpcCount_";
120121
private static final String FAILURE_CNT_BASE = "rpcFailureCount_";
122+
private static final String TOTAL_EXCEPTION_CNT = "rpcTotalExceptions";
123+
private static final String LOCAL_EXCEPTION_CNT_BASE = "rpcLocalExceptions_";
124+
private static final String REMOTE_EXCEPTION_CNT_BASE = "rpcRemoteExceptions_";
121125
private static final String DRTN_BASE = "rpcCallDurationMs_";
122126
private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
123127
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
@@ -638,16 +642,27 @@ private void shutdown() {
638642
}
639643

640644
/** Report RPC context to metrics system. */
641-
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, boolean failed) {
645+
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, Throwable e) {
642646
int callsPerServer = stats.getConcurrentCallsPerServer();
643647
if (callsPerServer > 0) {
644648
concurrentCallsPerServerHist.update(callsPerServer);
645649
}
646650
// Update the counter that tracks RPCs by type.
647651
final String methodName = method.getService().getName() + "_" + method.getName();
648652
getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc();
649-
if (failed) {
653+
if (e != null) {
650654
getMetric(FAILURE_CNT_BASE + methodName, rpcCounters, counterFactory).inc();
655+
getMetric(TOTAL_EXCEPTION_CNT, rpcCounters, counterFactory).inc();
656+
if (e instanceof RemoteException) {
657+
String fullClassName = ((RemoteException) e).getClassName();
658+
String simpleClassName = (fullClassName != null)
659+
? fullClassName.substring(fullClassName.lastIndexOf(".") + 1)
660+
: "unknown";
661+
getMetric(REMOTE_EXCEPTION_CNT_BASE + simpleClassName, rpcCounters, counterFactory).inc();
662+
} else {
663+
getMetric(LOCAL_EXCEPTION_CNT_BASE + e.getClass().getSimpleName(), rpcCounters,
664+
counterFactory).inc();
665+
}
651666
}
652667
// this implementation is tied directly to protobuf implementation details. would be better
653668
// if we could dispatch based on something static, ie, request Message type.

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,16 +375,15 @@ private T getConnection(ConnectionId remoteId) throws IOException {
375375
private void onCallFinished(Call call, HBaseRpcController hrc, Address addr,
376376
RpcCallback<Message> callback) {
377377
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime());
378-
final boolean failed = (call.error != null) ? true : false;
379378
if (metrics != null) {
380-
metrics.updateRpc(call.md, call.param, call.callStats, failed);
379+
metrics.updateRpc(call.md, call.param, call.callStats, call.error);
381380
}
382381
if (LOG.isTraceEnabled()) {
383382
LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id,
384383
call.md.getName(), call.getStartTime(), call.callStats.getCallTimeMs(),
385-
failed ? "failed" : "successful");
384+
call.error != null ? "failed" : "successful");
386385
}
387-
if (failed) {
386+
if (call.error != null) {
388387
if (call.error instanceof RemoteException) {
389388
call.error.fillInStackTrace();
390389
hrc.setFailed(call.error);

hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestMetricsConnection.java

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
import java.util.concurrent.ThreadPoolExecutor;
3232
import org.apache.hadoop.conf.Configuration;
3333
import org.apache.hadoop.hbase.HBaseClassTestRule;
34+
import org.apache.hadoop.hbase.ipc.CallTimeoutException;
35+
import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
3436
import org.apache.hadoop.hbase.security.User;
3537
import org.apache.hadoop.hbase.testclassification.ClientTests;
3638
import org.apache.hadoop.hbase.testclassification.MetricsTests;
@@ -150,51 +152,77 @@ public void testStaticMetrics() throws IOException {
150152

151153
for (int i = 0; i < loop; i++) {
152154
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"),
153-
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
155+
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), null);
154156
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"),
155-
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
157+
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(),
158+
new RemoteWithExtrasException("java.io.IOException", null, false, false));
156159
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"),
157-
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(), true);
160+
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(),
161+
new CallTimeoutException("test with CallTimeoutException"));
158162
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
159163
MutateRequest.newBuilder()
160164
.setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo)))
161165
.setRegion(region).build(),
162-
MetricsConnection.newCallStats(), false);
166+
MetricsConnection.newCallStats(), null);
163167
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
164168
MutateRequest.newBuilder()
165169
.setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo)))
166170
.setRegion(region).build(),
167-
MetricsConnection.newCallStats(), false);
171+
MetricsConnection.newCallStats(), null);
168172
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
169173
MutateRequest.newBuilder()
170174
.setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo)))
171175
.setRegion(region).build(),
172-
MetricsConnection.newCallStats(), false);
176+
MetricsConnection.newCallStats(), null);
173177
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
174178
MutateRequest.newBuilder()
175179
.setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region)
176180
.build(),
177-
MetricsConnection.newCallStats(), false);
181+
MetricsConnection.newCallStats(), null);
178182
}
183+
179184
final String rpcCountPrefix = "rpcCount_" + ClientService.getDescriptor().getName() + "_";
180185
final String rpcFailureCountPrefix =
181186
"rpcFailureCount_" + ClientService.getDescriptor().getName() + "_";
182187
String metricKey;
183188
long metricVal;
184189
Counter counter;
185-
for (String method : new String[] { "Get", "Scan", "Mutate" }) {
190+
191+
for (String method : new String[] { "Get", "Scan", "Multi", "Mutate" }) {
186192
metricKey = rpcCountPrefix + method;
187193
metricVal = METRICS.getRpcCounters().get(metricKey).getCount();
188194
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop);
195+
189196
metricKey = rpcFailureCountPrefix + method;
190197
counter = METRICS.getRpcCounters().get(metricKey);
191198
metricVal = (counter != null) ? counter.getCount() : 0;
192-
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
199+
if (method.equals("Get") || method.equals("Mutate")) {
200+
// no failure
201+
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
202+
} else {
203+
// has failure
204+
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
205+
}
193206
}
194-
metricKey = rpcFailureCountPrefix + "Multi";
207+
208+
// remote exception
209+
metricKey = "rpcRemoteExceptions_IOException";
210+
counter = METRICS.getRpcCounters().get(metricKey);
211+
metricVal = (counter != null) ? counter.getCount() : 0;
212+
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
213+
214+
// local exception
215+
metricKey = "rpcLocalExceptions_CallTimeoutException";
195216
counter = METRICS.getRpcCounters().get(metricKey);
196217
metricVal = (counter != null) ? counter.getCount() : 0;
197218
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
219+
220+
// total exception
221+
metricKey = "rpcTotalExceptions";
222+
counter = METRICS.getRpcCounters().get(metricKey);
223+
metricVal = (counter != null) ? counter.getCount() : 0;
224+
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop * 2);
225+
198226
for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] {
199227
METRICS.getGetTracker(), METRICS.getScanTracker(), METRICS.getMultiTracker(),
200228
METRICS.getAppendTracker(), METRICS.getDeleteTracker(), METRICS.getIncrementTracker(),

0 commit comments

Comments
 (0)