Skip to content

Commit 1687018

Browse files
PDavidDávid Paksy
andauthored
HBASE-28621 PrefixFilter should use SEEK_NEXT_USING_HINT (#6361)
Co-authored-by: Dávid Paksy <[email protected]> Signed-off-by: Istvan Toth <[email protected]>
1 parent b7ce297 commit 1687018

File tree

3 files changed

+243
-15
lines changed

3 files changed

+243
-15
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
package org.apache.hadoop.hbase.filter;
1919

2020
import java.util.ArrayList;
21+
import java.util.Arrays;
2122
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
2223
import org.apache.hadoop.hbase.Cell;
24+
import org.apache.hadoop.hbase.PrivateCellUtil;
2325
import org.apache.hadoop.hbase.exceptions.DeserializationException;
2426
import org.apache.hadoop.hbase.util.ByteBufferUtils;
2527
import org.apache.hadoop.hbase.util.Bytes;
@@ -35,13 +37,31 @@
3537
* Pass results that have same row prefix.
3638
*/
3739
@InterfaceAudience.Public
38-
public class PrefixFilter extends FilterBase {
40+
public class PrefixFilter extends FilterBase implements HintingFilter {
3941
protected byte[] prefix = null;
4042
protected boolean passedPrefix = false;
4143
protected boolean filterRow = true;
44+
protected boolean provideHint = false;
45+
protected Cell reversedNextCellHint;
46+
protected Cell forwardNextCellHint;
4247

4348
public PrefixFilter(final byte[] prefix) {
4449
this.prefix = prefix;
50+
// Pre-compute hints at creation to avoid re-computing them several times in the corner
51+
// case where there are a lot of cells between the hint and the first real match.
52+
createCellHints();
53+
}
54+
55+
private void createCellHints() {
56+
if (prefix == null) {
57+
return;
58+
}
59+
// On reversed scan hint should be the prefix with last byte incremented
60+
byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix);
61+
this.reversedNextCellHint =
62+
PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length);
63+
// On forward scan hint should be the prefix
64+
this.forwardNextCellHint = PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length);
4565
}
4666

4767
public byte[] getPrefix() {
@@ -50,12 +70,15 @@ public byte[] getPrefix() {
5070

5171
@Override
5272
public boolean filterRowKey(Cell firstRowCell) {
53-
if (firstRowCell == null || this.prefix == null) return true;
54-
if (filterAllRemaining()) return true;
55-
int length = firstRowCell.getRowLength();
56-
if (length < prefix.length) return true;
73+
if (firstRowCell == null || this.prefix == null) {
74+
return true;
75+
}
76+
if (filterAllRemaining()) {
77+
return true;
78+
}
79+
// if the cell is before => return false so that getNextCellHint() is invoked.
5780
// if they are equal, return false => pass row
58-
// else return true, filter row
81+
// if the cell is after => return true, filter row
5982
// if we are passed the prefix, set flag
6083
int cmp;
6184
if (firstRowCell instanceof ByteBufferExtendedCell) {
@@ -70,12 +93,18 @@ public boolean filterRowKey(Cell firstRowCell) {
7093
passedPrefix = true;
7194
}
7295
filterRow = (cmp != 0);
73-
return filterRow;
96+
provideHint = (!isReversed() && cmp < 0) || (isReversed() && cmp > 0);
97+
return passedPrefix;
7498
}
7599

76100
@Override
77101
public ReturnCode filterCell(final Cell c) {
78-
if (filterRow) return ReturnCode.NEXT_ROW;
102+
if (provideHint) {
103+
return ReturnCode.SEEK_NEXT_USING_HINT;
104+
}
105+
if (filterRow) {
106+
return ReturnCode.NEXT_ROW;
107+
}
79108
return ReturnCode.INCLUDE;
80109
}
81110

@@ -94,6 +123,27 @@ public boolean filterAllRemaining() {
94123
return passedPrefix;
95124
}
96125

126+
@Override
127+
public Cell getNextCellHint(Cell cell) {
128+
if (reversed) {
129+
return reversedNextCellHint;
130+
} else {
131+
return forwardNextCellHint;
132+
}
133+
}
134+
135+
private byte[] increaseLastNonMaxByte(byte[] bytes) {
136+
byte[] result = Arrays.copyOf(bytes, bytes.length);
137+
for (int i = bytes.length - 1; i >= 0; i--) {
138+
byte b = bytes[i];
139+
if (b < Byte.MAX_VALUE) {
140+
result[i] = (byte) (b + 1);
141+
break;
142+
}
143+
}
144+
return result;
145+
}
146+
97147
public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
98148
Preconditions.checkArgument(filterArguments.size() == 1, "Expected 1 but got: %s",
99149
filterArguments.size());
@@ -105,7 +155,9 @@ public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments
105155
@Override
106156
public byte[] toByteArray() {
107157
FilterProtos.PrefixFilter.Builder builder = FilterProtos.PrefixFilter.newBuilder();
108-
if (this.prefix != null) builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix));
158+
if (this.prefix != null) {
159+
builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix));
160+
}
109161
return builder.build().toByteArray();
110162
}
111163

hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,9 @@ private void mpOneTest(Filter filterMPONE) throws Exception {
169169
assertTrue(filterMPONE.filterRowKey(KeyValueUtil.createFirstOnRow(rowkey)));
170170
kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0), Bytes.toBytes(0));
171171
assertFalse(Filter.ReturnCode.INCLUDE == filterMPONE.filterCell(kv));
172-
assertFalse(filterMPONE.filterRow());
172+
// FilterList.filterRow() returns true because previously "z" was filtered out (return true) by
173+
// PrefixFilter.filterRowKey()
174+
assertTrue(filterMPONE.filterRow());
173175

174176
/* We should filter any row */
175177
rowkey = Bytes.toBytes("z");

hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestPrefixFilter.java

Lines changed: 179 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,15 @@
1717
*/
1818
package org.apache.hadoop.hbase.filter;
1919

20-
import static org.junit.Assert.*;
20+
import static org.junit.Assert.assertEquals;
21+
import static org.junit.Assert.assertFalse;
22+
import static org.junit.Assert.assertNotNull;
23+
import static org.junit.Assert.assertNull;
24+
import static org.junit.Assert.assertTrue;
2125

26+
import org.apache.hadoop.hbase.Cell;
2227
import org.apache.hadoop.hbase.HBaseClassTestRule;
28+
import org.apache.hadoop.hbase.KeyValue;
2329
import org.apache.hadoop.hbase.KeyValueUtil;
2430
import org.apache.hadoop.hbase.testclassification.FilterTests;
2531
import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -40,7 +46,6 @@ public class TestPrefixFilter {
4046
static final char FIRST_CHAR = 'a';
4147
static final char LAST_CHAR = 'e';
4248
static final String HOST_PREFIX = "org.apache.site-";
43-
static final byte[] GOOD_BYTES = Bytes.toBytes("abc");
4449

4550
@Before
4651
public void setUp() throws Exception {
@@ -82,13 +87,182 @@ private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throw
8287
}
8388
String yahooSite = "com.yahoo.www";
8489
byte[] yahooSiteBytes = Bytes.toBytes(yahooSite);
85-
assertTrue("Failed with character " + yahooSite,
86-
filter.filterRowKey(KeyValueUtil.createFirstOnRow(yahooSiteBytes)));
87-
assertEquals(filter.filterAllRemaining(), lastFilterAllRemaining);
90+
KeyValue yahooSiteCell = KeyValueUtil.createFirstOnRow(yahooSiteBytes);
91+
assertFalse("Failed with character " + yahooSite, filter.filterRowKey(yahooSiteCell));
92+
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(yahooSiteCell));
93+
assertEquals(lastFilterAllRemaining, filter.filterAllRemaining());
8894
}
8995

9096
private byte[] createRow(final char c) {
9197
return Bytes.toBytes(HOST_PREFIX + Character.toString(c));
9298
}
9399

100+
@Test
101+
public void shouldProvideHintWhenKeyBefore() {
102+
byte[] prefix = Bytes.toBytes("gg");
103+
PrefixFilter filter = new PrefixFilter(prefix);
104+
105+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));
106+
107+
// Should include this row so that filterCell() will be invoked.
108+
assertFalse(filter.filterRowKey(cell));
109+
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
110+
Cell actualCellHint = filter.getNextCellHint(cell);
111+
assertNotNull(actualCellHint);
112+
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix);
113+
assertEquals(expectedCellHint, actualCellHint);
114+
assertFalse(filter.filterAllRemaining());
115+
assertTrue(filter.filterRow());
116+
}
117+
118+
@Test
119+
public void shouldProvideHintWhenKeyBeforeAndShorter() {
120+
byte[] prefix = Bytes.toBytes("gggg");
121+
PrefixFilter filter = new PrefixFilter(prefix);
122+
123+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));
124+
125+
// Should include this row so that filterCell() will be invoked.
126+
assertFalse(filter.filterRowKey(cell));
127+
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
128+
Cell actualCellHint = filter.getNextCellHint(cell);
129+
assertNotNull(actualCellHint);
130+
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix);
131+
assertEquals(expectedCellHint, actualCellHint);
132+
assertFalse(filter.filterAllRemaining());
133+
assertTrue(filter.filterRow());
134+
}
135+
136+
@Test
137+
public void shouldIncludeWhenKeyMatches() {
138+
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg"));
139+
140+
KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("gg"));
141+
142+
assertFalse(filter.filterRowKey(matchingCell));
143+
assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell));
144+
assertFalse(filter.filterAllRemaining());
145+
assertFalse(filter.filterRow());
146+
}
147+
148+
@Test
149+
public void shouldReturnNextRowWhenKeyAfter() {
150+
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg"));
151+
152+
KeyValue afterCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pp"));
153+
154+
assertTrue(filter.filterRowKey(afterCell));
155+
assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(afterCell));
156+
assertTrue(filter.filterAllRemaining());
157+
assertTrue(filter.filterRow());
158+
}
159+
160+
@Test
161+
public void shouldProvideHintWhenKeyBeforeReversed() {
162+
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa"));
163+
filter.setReversed(true);
164+
165+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x"));
166+
167+
// Should include this row so that filterCell() will be invoked.
168+
assertFalse(filter.filterRowKey(cell));
169+
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
170+
Cell actualCellHint = filter.getNextCellHint(cell);
171+
assertNotNull(actualCellHint);
172+
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes("ab"));
173+
assertEquals(expectedCellHint, actualCellHint);
174+
assertFalse(filter.filterAllRemaining());
175+
assertTrue(filter.filterRow());
176+
}
177+
178+
@Test
179+
public void hintShouldIncreaseLastNonMaxByteWhenReversed() {
180+
PrefixFilter filter = new PrefixFilter(new byte[] { 'a', 'a', Byte.MAX_VALUE });
181+
filter.setReversed(true);
182+
183+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x"));
184+
185+
// Should include this row so that filterCell() will be invoked.
186+
assertFalse(filter.filterRowKey(cell));
187+
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
188+
Cell actualCellHint = filter.getNextCellHint(cell);
189+
assertNotNull(actualCellHint);
190+
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(new byte[] { 'a', 'b', Byte.MAX_VALUE });
191+
assertEquals(expectedCellHint, actualCellHint);
192+
assertFalse(filter.filterAllRemaining());
193+
assertTrue(filter.filterRow());
194+
}
195+
196+
@Test
197+
public void shouldIncludeWhenKeyMatchesReversed() {
198+
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa"));
199+
filter.setReversed(true);
200+
201+
KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));
202+
203+
assertFalse(filter.filterRowKey(matchingCell));
204+
assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell));
205+
assertFalse(filter.filterAllRemaining());
206+
assertFalse(filter.filterRow());
207+
}
208+
209+
@Test
210+
public void shouldReturnNextRowWhenKeyAfterReversed() {
211+
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("dd"));
212+
filter.setReversed(true);
213+
214+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));
215+
216+
assertTrue(filter.filterRowKey(cell));
217+
assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell));
218+
assertTrue(filter.filterAllRemaining());
219+
assertTrue(filter.filterRow());
220+
}
221+
222+
@Test
223+
public void hintShouldNotIncreaseMaxBytesWhenReversed() {
224+
PrefixFilter filter =
225+
new PrefixFilter(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE });
226+
filter.setReversed(true);
227+
228+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x"));
229+
230+
assertTrue(filter.filterRowKey(cell));
231+
assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell));
232+
Cell actualCellHint = filter.getNextCellHint(cell);
233+
assertNotNull(actualCellHint);
234+
Cell expectedCellHint =
235+
KeyValueUtil.createFirstOnRow(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE });
236+
assertEquals(expectedCellHint, actualCellHint);
237+
assertTrue(filter.filterAllRemaining());
238+
assertTrue(filter.filterRow());
239+
}
240+
241+
@Test
242+
public void shouldNotThrowWhenCreatedWithNullPrefix() {
243+
PrefixFilter filter = new PrefixFilter(null);
244+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("doesNotMatter"));
245+
246+
assertNull(filter.getNextCellHint(cell));
247+
filter.setReversed(true);
248+
assertNull(filter.getNextCellHint(cell));
249+
}
250+
251+
@Test
252+
public void shouldNotThrowWhenCreatedWithEmptyByteArrayPrefix() {
253+
byte[] emptyPrefix = {};
254+
KeyValue emptyPrefixCell = KeyValueUtil.createFirstOnRow(emptyPrefix);
255+
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("doesNotMatter"));
256+
257+
PrefixFilter filter = new PrefixFilter(emptyPrefix);
258+
259+
Cell forwardNextCellHint = filter.getNextCellHint(cell);
260+
assertNotNull(forwardNextCellHint);
261+
assertEquals(emptyPrefixCell, forwardNextCellHint);
262+
263+
filter.setReversed(true);
264+
Cell reverseNextCellHint = filter.getNextCellHint(cell);
265+
assertNotNull(reverseNextCellHint);
266+
assertEquals(emptyPrefixCell, reverseNextCellHint);
267+
}
94268
}

0 commit comments

Comments
 (0)