Skip to content

Commit 1b27124

Browse files
authored
HBASE-26304 Reflect out of band locality improvements in metrics and balancer (#3803)
Signed-off-by: Duo Zhang <[email protected]>
1 parent 33287ac commit 1b27124

File tree

10 files changed

+615
-12
lines changed

10 files changed

+615
-12
lines changed

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionHDFSBlockLocationFinder.java

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
import org.apache.hadoop.conf.Configured;
3333
import org.apache.hadoop.hbase.ClusterMetrics;
3434
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35+
import org.apache.hadoop.hbase.RegionMetrics;
36+
import org.apache.hadoop.hbase.ServerMetrics;
3537
import org.apache.hadoop.hbase.ServerName;
3638
import org.apache.hadoop.hbase.TableName;
3739
import org.apache.hadoop.hbase.client.RegionInfo;
@@ -40,7 +42,6 @@
4042
import org.apache.yetus.audience.InterfaceAudience;
4143
import org.slf4j.Logger;
4244
import org.slf4j.LoggerFactory;
43-
4445
import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder;
4546
import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader;
4647
import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache;
@@ -58,6 +59,7 @@
5859
class RegionHDFSBlockLocationFinder extends Configured {
5960
private static final Logger LOG = LoggerFactory.getLogger(RegionHDFSBlockLocationFinder.class);
6061
private static final long CACHE_TIME = 240 * 60 * 1000;
62+
private static final float EPSILON = 0.0001f;
6163
private static final HDFSBlocksDistribution EMPTY_BLOCK_DISTRIBUTION =
6264
new HDFSBlocksDistribution();
6365
private volatile ClusterMetrics status;
@@ -110,12 +112,70 @@ void setClusterInfoProvider(ClusterInfoProvider provider) {
110112

111113
void setClusterMetrics(ClusterMetrics status) {
112114
long currentTime = EnvironmentEdgeManager.currentTime();
113-
this.status = status;
115+
114116
if (currentTime > lastFullRefresh + (CACHE_TIME / 2)) {
117+
this.status = status;
115118
// Only count the refresh if it includes user tables ( eg more than meta and namespace ).
116119
lastFullRefresh = scheduleFullRefresh() ? currentTime : lastFullRefresh;
120+
} else {
121+
refreshLocalityChangedRegions(this.status, status);
122+
this.status = status;
123+
}
124+
}
125+
126+
/**
127+
* If locality for a region has changed, that pretty certainly means our cache is out of date.
128+
* Compare oldStatus and newStatus, refreshing any regions which have moved or changed locality.
129+
*/
130+
private void refreshLocalityChangedRegions(ClusterMetrics oldStatus, ClusterMetrics newStatus) {
131+
if (oldStatus == null || newStatus == null) {
132+
LOG.debug("Skipping locality-based refresh due to oldStatus={}, newStatus={}",
133+
oldStatus, newStatus);
134+
return;
135+
}
136+
137+
Map<ServerName, ServerMetrics> oldServers = oldStatus.getLiveServerMetrics();
138+
Map<ServerName, ServerMetrics> newServers = newStatus.getLiveServerMetrics();
139+
140+
Map<String, RegionInfo> regionsByName = new HashMap<>(cache.asMap().size());
141+
for (RegionInfo regionInfo : cache.asMap().keySet()) {
142+
regionsByName.put(regionInfo.getEncodedName(), regionInfo);
143+
}
144+
145+
for (Map.Entry<ServerName, ServerMetrics> serverEntry : newServers.entrySet()) {
146+
Map<byte[], RegionMetrics> newRegions = serverEntry.getValue().getRegionMetrics();
147+
for (Map.Entry<byte[], RegionMetrics> regionEntry : newRegions.entrySet()) {
148+
String encodedName = RegionInfo.encodeRegionName(regionEntry.getKey());
149+
RegionInfo region = regionsByName.get(encodedName);
150+
if (region == null) {
151+
continue;
152+
}
153+
154+
float newLocality = regionEntry.getValue().getDataLocality();
155+
float oldLocality = getOldLocality(serverEntry.getKey(), regionEntry.getKey(), oldServers);
156+
157+
if (Math.abs(newLocality - oldLocality) > EPSILON) {
158+
LOG.debug("Locality for region {} changed from {} to {}, refreshing cache",
159+
region.getEncodedName(), oldLocality, newLocality);
160+
cache.refresh(region);
161+
}
162+
}
163+
164+
}
165+
}
166+
167+
private float getOldLocality(ServerName newServer, byte[] regionName,
168+
Map<ServerName, ServerMetrics> oldServers) {
169+
ServerMetrics serverMetrics = oldServers.get(newServer);
170+
if (serverMetrics == null) {
171+
return -1f;
172+
}
173+
RegionMetrics regionMetrics = serverMetrics.getRegionMetrics().get(regionName);
174+
if (regionMetrics == null) {
175+
return -1f;
117176
}
118177

178+
return regionMetrics.getDataLocality();
119179
}
120180

121181
/**
@@ -159,7 +219,7 @@ private HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) {
159219
return blocksDistribution;
160220
}
161221
} catch (IOException ioe) {
162-
LOG.warn("IOException during HDFSBlocksDistribution computation. for " + "region = " +
222+
LOG.warn("IOException during HDFSBlocksDistribution computation for region = {}",
163223
region.getEncodedName(), ioe);
164224
}
165225

@@ -263,7 +323,7 @@ void refreshAndWait(Collection<RegionInfo> hris) {
263323
} catch (InterruptedException ite) {
264324
Thread.currentThread().interrupt();
265325
} catch (ExecutionException ee) {
266-
LOG.debug("ExecutionException during HDFSBlocksDistribution computation. for region = " +
326+
LOG.debug("ExecutionException during HDFSBlocksDistribution computation for region = {}",
267327
hregionInfo.getEncodedName(), ee);
268328
}
269329
index++;

hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRegionHDFSBlockLocationFinder.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.junit.Assert.assertEquals;
2121
import static org.junit.Assert.assertNotNull;
22+
import static org.junit.Assert.assertNotSame;
2223
import static org.junit.Assert.assertNull;
2324
import static org.junit.Assert.assertSame;
2425
import static org.junit.Assert.assertTrue;
@@ -31,12 +32,14 @@
3132
import java.util.List;
3233
import java.util.Map;
3334
import java.util.Random;
35+
import java.util.TreeMap;
3436
import org.apache.hadoop.conf.Configuration;
3537
import org.apache.hadoop.hbase.ClusterMetrics;
3638
import org.apache.hadoop.hbase.HBaseClassTestRule;
3739
import org.apache.hadoop.hbase.HConstants;
3840
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
3941
import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
42+
import org.apache.hadoop.hbase.RegionMetrics;
4043
import org.apache.hadoop.hbase.ServerMetrics;
4144
import org.apache.hadoop.hbase.ServerName;
4245
import org.apache.hadoop.hbase.TableName;
@@ -204,4 +207,59 @@ public void testGetTopBlockLocations() {
204207
}
205208
}
206209
}
210+
211+
@Test
212+
public void testRefreshRegionsWithChangedLocality() {
213+
ServerName testServer = ServerName.valueOf("host-0", 12345, 12345);
214+
RegionInfo testRegion = REGIONS.get(0);
215+
216+
Map<RegionInfo, HDFSBlocksDistribution> cache = new HashMap<>();
217+
for (RegionInfo region : REGIONS) {
218+
HDFSBlocksDistribution hbd = finder.getBlockDistribution(region);
219+
assertHostAndWeightEquals(generate(region), hbd);
220+
cache.put(region, hbd);
221+
}
222+
223+
finder.setClusterMetrics(getMetricsWithLocality(testServer, testRegion.getRegionName(),
224+
0.123f));
225+
226+
// everything should be cached, because metrics were null before
227+
for (RegionInfo region : REGIONS) {
228+
HDFSBlocksDistribution hbd = finder.getBlockDistribution(region);
229+
assertSame(cache.get(region), hbd);
230+
}
231+
232+
finder.setClusterMetrics(getMetricsWithLocality(testServer, testRegion.getRegionName(),
233+
0.345f));
234+
235+
// locality changed just for our test region, so it should no longer be the same
236+
for (RegionInfo region : REGIONS) {
237+
HDFSBlocksDistribution hbd = finder.getBlockDistribution(region);
238+
if (region.equals(testRegion)) {
239+
assertNotSame(cache.get(region), hbd);
240+
} else {
241+
assertSame(cache.get(region), hbd);
242+
}
243+
}
244+
}
245+
246+
private ClusterMetrics getMetricsWithLocality(ServerName serverName, byte[] region,
247+
float locality) {
248+
RegionMetrics regionMetrics = mock(RegionMetrics.class);
249+
when(regionMetrics.getDataLocality()).thenReturn(locality);
250+
251+
Map<byte[], RegionMetrics> regionMetricsMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
252+
regionMetricsMap.put(region, regionMetrics);
253+
254+
ServerMetrics serverMetrics = mock(ServerMetrics.class);
255+
when(serverMetrics.getRegionMetrics()).thenReturn(regionMetricsMap);
256+
257+
Map<ServerName, ServerMetrics> serverMetricsMap = new HashMap<>();
258+
serverMetricsMap.put(serverName, serverMetrics);
259+
260+
ClusterMetrics metrics = mock(ClusterMetrics.class);
261+
when(metrics.getLiveServerMetrics()).thenReturn(serverMetricsMap);
262+
263+
return metrics;
264+
}
207265
}

hbase-common/src/main/resources/hbase-default.xml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,4 +2042,23 @@ possible configurations would overwhelm and obscure the important.
20422042
the ring buffer is indicated by config: hbase.master.balancer.rejection.queue.size
20432043
</description>
20442044
</property>
2045+
<property>
2046+
<name>hbase.locality.inputstream.derive.enabled</name>
2047+
<value>false</value>
2048+
<description>
2049+
If true, derive StoreFile locality metrics from the underlying DFSInputStream
2050+
backing reads for that StoreFile. This value will update as the DFSInputStream's
2051+
block locations are updated over time. Otherwise, locality is computed on StoreFile
2052+
open, and cached until the StoreFile is closed.
2053+
</description>
2054+
</property>
2055+
<property>
2056+
<name>hbase.locality.inputstream.derive.cache.period</name>
2057+
<value>60000</value>
2058+
<description>
2059+
If deriving StoreFile locality metrics from the underlying DFSInputStream, how
2060+
long should the derived values be cached for. The derivation process may involve
2061+
hitting the namenode, if the DFSInputStream's block list is incomplete.
2062+
</description>
2063+
</property>
20452064
</configuration>

hbase-server/src/main/java/org/apache/hadoop/hbase/io/FileLink.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int buf
126126
this.in = tryOpen();
127127
}
128128

129+
private FSDataInputStream getUnderlyingInputStream() {
130+
return in;
131+
}
132+
129133
@Override
130134
public int read() throws IOException {
131135
int res;
@@ -475,6 +479,17 @@ public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOExce
475479
return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize));
476480
}
477481

482+
/**
483+
* If the passed FSDataInputStream is backed by a FileLink, returns the underlying
484+
* InputStream for the resolved link target. Otherwise, returns null.
485+
*/
486+
public static FSDataInputStream getUnderlyingFileLinkInputStream(FSDataInputStream stream) {
487+
if (stream.getWrappedStream() instanceof FileLinkInputStream) {
488+
return ((FileLinkInputStream) stream.getWrappedStream()).getUnderlyingInputStream();
489+
}
490+
return null;
491+
}
492+
478493
/**
479494
* NOTE: This method must be used only in the constructor!
480495
* It creates a List with the specified locations for the link.

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Set;
3030
import java.util.concurrent.atomic.AtomicBoolean;
3131
import org.apache.hadoop.conf.Configuration;
32+
import org.apache.hadoop.fs.FSDataInputStream;
3233
import org.apache.hadoop.fs.FileSystem;
3334
import org.apache.hadoop.fs.Path;
3435
import org.apache.hadoop.hbase.Cell;
@@ -127,6 +128,7 @@ public class HStoreFile implements StoreFile {
127128

128129
// StoreFile.Reader
129130
private volatile StoreFileReader initialReader;
131+
private volatile InputStreamBlockDistribution initialReaderBlockDistribution = null;
130132

131133
// Block cache configuration and reference.
132134
private final CacheConfig cacheConf;
@@ -344,7 +346,11 @@ public OptionalLong getBulkLoadTimestamp() {
344346
* file is opened.
345347
*/
346348
public HDFSBlocksDistribution getHDFSBlockDistribution() {
347-
return this.fileInfo.getHDFSBlockDistribution();
349+
if (initialReaderBlockDistribution != null) {
350+
return initialReaderBlockDistribution.getHDFSBlockDistribution();
351+
} else {
352+
return this.fileInfo.getHDFSBlockDistribution();
353+
}
348354
}
349355

350356
/**
@@ -362,6 +368,13 @@ private void open() throws IOException {
362368
fileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
363369
}
364370
this.initialReader = fileInfo.postStoreFileReaderOpen(context, cacheConf, reader);
371+
372+
if (InputStreamBlockDistribution.isEnabled(fileInfo.getConf())) {
373+
boolean useHBaseChecksum = context.getInputStreamWrapper().shouldUseHBaseChecksum();
374+
FSDataInputStream stream = context.getInputStreamWrapper().getStream(useHBaseChecksum);
375+
this.initialReaderBlockDistribution = new InputStreamBlockDistribution(stream, fileInfo);
376+
}
377+
365378
// Load up indices and fileinfo. This also loads Bloom filter type.
366379
metadataMap = Collections.unmodifiableMap(initialReader.loadFileInfo());
367380

0 commit comments

Comments
 (0)