1515 * See the License for the specific language governing permissions and
1616 * limitations under the License.
1717 */
18- package org .apache .hadoop .hbase ;
18+ package org .apache .hadoop .hbase .client ;
19+
20+ import static org .apache .hadoop .hbase .client .MetricsConnection .CLIENT_SIDE_METRICS_ENABLED_KEY ;
1921
2022import java .io .IOException ;
2123import java .net .SocketTimeoutException ;
3335import org .apache .hadoop .hbase .client .Scan ;
3436import org .apache .hadoop .hbase .client .Table ;
3537import org .apache .hadoop .hbase .client .TableDescriptorBuilder ;
38+ import org .apache .hadoop .hbase .HBaseClassTestRule ;
39+ import org .apache .hadoop .hbase .HBaseTestingUtility ;
40+ import org .apache .hadoop .hbase .HConstants ;
41+ import org .apache .hadoop .hbase .MiniHBaseCluster ;
42+ import org .apache .hadoop .hbase .StartMiniClusterOption ;
43+ import org .apache .hadoop .hbase .TableName ;
3644import org .apache .hadoop .hbase .ipc .CallTimeoutException ;
3745import org .apache .hadoop .hbase .regionserver .HRegionServer ;
3846import org .apache .hadoop .hbase .regionserver .RSRpcServices ;
@@ -79,7 +87,8 @@ public class TestClientOperationTimeout {
7987 private static int DELAY_GET ;
8088 private static int DELAY_SCAN ;
8189 private static int DELAY_MUTATE ;
82- private static int DELAY_BATCH_MUTATE ;
90+ private static int DELAY_BATCH ;
91+ private static int DELAY_META_SCAN ;
8392
8493 private static final TableName TABLE_NAME = TableName .valueOf ("Timeout" );
8594 private static final byte [] FAMILY = Bytes .toBytes ("family" );
@@ -113,7 +122,8 @@ public void setUp() throws Exception {
113122 DELAY_GET = 0 ;
114123 DELAY_SCAN = 0 ;
115124 DELAY_MUTATE = 0 ;
116- DELAY_BATCH_MUTATE = 0 ;
125+ DELAY_BATCH = 0 ;
126+ DELAY_META_SCAN = 0 ;
117127 }
118128
119129 @ AfterClass
@@ -162,8 +172,8 @@ public void testPutTimeout() {
162172 * operation takes longer than 'hbase.client.operation.timeout'.
163173 */
164174 @ Test
165- public void testMultiPutsTimeout () {
166- DELAY_BATCH_MUTATE = 600 ;
175+ public void testMultiTimeout () {
176+ DELAY_BATCH = 600 ;
167177 Put put1 = new Put (ROW );
168178 put1 .addColumn (FAMILY , QUALIFIER , VALUE );
169179 Put put2 = new Put (ROW );
@@ -177,6 +187,72 @@ public void testMultiPutsTimeout() {
177187 } catch (Exception e ) {
178188 Assert .assertTrue (e instanceof RetriesExhaustedWithDetailsException );
179189 }
190+
191+ Get get1 = new Get (ROW );
192+ get1 .addColumn (FAMILY , QUALIFIER );
193+ Get get2 = new Get (ROW );
194+ get2 .addColumn (FAMILY , QUALIFIER );
195+
196+ List <Get > gets = new ArrayList <>();
197+ gets .add (get1 );
198+ gets .add (get2 );
199+ try {
200+ TABLE .batch (gets , new Object [2 ]);
201+ Assert .fail ("should not reach here" );
202+ } catch (Exception e ) {
203+ Assert .assertTrue (e instanceof RetriesExhaustedWithDetailsException );
204+ }
205+ }
206+
207+ /**
208+ * Tests that a batch get on a table throws
209+ * {@link org.apache.hadoop.hbase.client.OperationTimeoutExceededException} when the region lookup
210+ * takes longer than the 'hbase.client.operation.timeout'
211+ */
212+ @ Test
213+ public void testMultiGetMetaTimeout () throws IOException {
214+
215+ Configuration conf = new Configuration (UTIL .getConfiguration ());
216+
217+ // the operation timeout must be lower than the delay from a meta scan to etch region locations
218+ // of the get requests. Simply increasing the meta scan timeout to greater than the
219+ // HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD will result in SocketTimeoutException on the scans thus
220+ // avoiding the simulation of load on meta. See: HBASE-27487
221+ conf .setLong (HConstants .HBASE_CLIENT_OPERATION_TIMEOUT , 400 );
222+ conf .setBoolean (CLIENT_SIDE_METRICS_ENABLED_KEY , true );
223+ try (Connection specialConnection = ConnectionFactory .createConnection (conf );
224+ Table specialTable = specialConnection .getTable (TABLE_NAME )) {
225+
226+ MetricsConnection metrics =
227+ ((ConnectionImplementation ) specialConnection ).getConnectionMetrics ();
228+ long metaCacheNumClearServerPreFailure = metrics .metaCacheNumClearServer .getCount ();
229+
230+ DELAY_META_SCAN = 400 ;
231+ List <Get > gets = new ArrayList <>();
232+ // we need to ensure the region look-ups eat up more time than the operation timeout without
233+ // exceeding the scan timeout.
234+ for (int i = 0 ; i < 100 ; i ++) {
235+ gets .add (new Get (Bytes .toBytes (i )).addColumn (FAMILY , QUALIFIER ));
236+ }
237+ try {
238+ specialTable .get (gets );
239+ Assert .fail ("should not reach here" );
240+ } catch (Exception e ) {
241+ RetriesExhaustedWithDetailsException expected = (RetriesExhaustedWithDetailsException ) e ;
242+ Assert .assertEquals (100 , expected .getNumExceptions ());
243+
244+ // verify we do not clear the cache in this situation otherwise we will create pathological
245+ // feedback loop with multigets See: HBASE-27487
246+ long metaCacheNumClearServerPostFailure = metrics .metaCacheNumClearServer .getCount ();
247+ Assert .assertEquals (metaCacheNumClearServerPreFailure , metaCacheNumClearServerPostFailure );
248+
249+ for (Throwable cause : expected .getCauses ()) {
250+ Assert .assertTrue (cause instanceof OperationTimeoutExceededException );
251+ }
252+
253+ }
254+ }
255+
180256 }
181257
182258 /**
@@ -241,7 +317,12 @@ public ClientProtos.MutateResponse mutate(RpcController rpcc,
241317 public ClientProtos .ScanResponse scan (RpcController controller ,
242318 ClientProtos .ScanRequest request ) throws ServiceException {
243319 try {
244- Thread .sleep (DELAY_SCAN );
320+ String regionName = Bytes .toString (request .getRegion ().getValue ().toByteArray ());
321+ if (regionName .contains (TableName .META_TABLE_NAME .getNameAsString ())) {
322+ Thread .sleep (DELAY_META_SCAN );
323+ } else {
324+ Thread .sleep (DELAY_SCAN );
325+ }
245326 } catch (InterruptedException e ) {
246327 LOG .error ("Sleep interrupted during scan operation" , e );
247328 }
@@ -252,7 +333,7 @@ public ClientProtos.ScanResponse scan(RpcController controller,
252333 public ClientProtos .MultiResponse multi (RpcController rpcc , ClientProtos .MultiRequest request )
253334 throws ServiceException {
254335 try {
255- Thread .sleep (DELAY_BATCH_MUTATE );
336+ Thread .sleep (DELAY_BATCH );
256337 } catch (InterruptedException e ) {
257338 LOG .error ("Sleep interrupted during multi operation" , e );
258339 }
0 commit comments