diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java index 711b6d599443..07ed8d630eb1 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hbase.filter; import java.util.ArrayList; +import java.util.Arrays; import org.apache.hadoop.hbase.ByteBufferExtendedCell; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; @@ -35,13 +37,31 @@ * Pass results that have same row prefix. */ @InterfaceAudience.Public -public class PrefixFilter extends FilterBase { +public class PrefixFilter extends FilterBase implements HintingFilter { protected byte[] prefix = null; protected boolean passedPrefix = false; protected boolean filterRow = true; + protected boolean provideHint = false; + protected Cell reversedNextCellHint; + protected Cell forwardNextCellHint; public PrefixFilter(final byte[] prefix) { this.prefix = prefix; + // Pre-compute hints at creation to avoid re-computing them several times in the corner + // case where there are a lot of cells between the hint and the first real match. + createCellHints(); + } + + private void createCellHints() { + if (prefix == null) { + return; + } + // On reversed scan hint should be the prefix with last byte incremented + byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix); + this.reversedNextCellHint = + PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length); + // On forward scan hint should be the prefix + this.forwardNextCellHint = PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length); } public byte[] getPrefix() { @@ -50,12 +70,15 @@ public byte[] getPrefix() { @Override public boolean filterRowKey(Cell firstRowCell) { - if (firstRowCell == null || this.prefix == null) return true; - if (filterAllRemaining()) return true; - int length = firstRowCell.getRowLength(); - if (length < prefix.length) return true; + if (firstRowCell == null || this.prefix == null) { + return true; + } + if (filterAllRemaining()) { + return true; + } + // if the cell is before => return false so that getNextCellHint() is invoked. // if they are equal, return false => pass row - // else return true, filter row + // if the cell is after => return true, filter row // if we are passed the prefix, set flag int cmp; if (firstRowCell instanceof ByteBufferExtendedCell) { @@ -70,7 +93,8 @@ public boolean filterRowKey(Cell firstRowCell) { passedPrefix = true; } filterRow = (cmp != 0); - return filterRow; + provideHint = (!isReversed() && cmp < 0) || (isReversed() && cmp > 0); + return passedPrefix; } @Deprecated @@ -81,7 +105,12 @@ public ReturnCode filterKeyValue(final Cell c) { @Override public ReturnCode filterCell(final Cell c) { - if (filterRow) return ReturnCode.NEXT_ROW; + if (provideHint) { + return ReturnCode.SEEK_NEXT_USING_HINT; + } + if (filterRow) { + return ReturnCode.NEXT_ROW; + } return ReturnCode.INCLUDE; } @@ -100,6 +129,27 @@ public boolean filterAllRemaining() { return passedPrefix; } + @Override + public Cell getNextCellHint(Cell cell) { + if (reversed) { + return reversedNextCellHint; + } else { + return forwardNextCellHint; + } + } + + private byte[] increaseLastNonMaxByte(byte[] bytes) { + byte[] result = Arrays.copyOf(bytes, bytes.length); + for (int i = bytes.length - 1; i >= 0; i--) { + byte b = bytes[i]; + if (b < Byte.MAX_VALUE) { + result[i] = (byte) (b + 1); + break; + } + } + return result; + } + public static Filter createFilterFromArguments(ArrayList filterArguments) { Preconditions.checkArgument(filterArguments.size() == 1, "Expected 1 but got: %s", filterArguments.size()); @@ -111,7 +161,9 @@ public static Filter createFilterFromArguments(ArrayList filterArguments @Override public byte[] toByteArray() { FilterProtos.PrefixFilter.Builder builder = FilterProtos.PrefixFilter.newBuilder(); - if (this.prefix != null) builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix)); + if (this.prefix != null) { + builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix)); + } return builder.build().toByteArray(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java index f64381a8a22e..4400894b8265 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java @@ -168,7 +168,9 @@ private void mpOneTest(Filter filterMPONE) throws Exception { assertTrue(filterMPONE.filterRowKey(KeyValueUtil.createFirstOnRow(rowkey))); kv = new KeyValue(rowkey, rowkey, Bytes.toBytes(0), Bytes.toBytes(0)); assertFalse(Filter.ReturnCode.INCLUDE == filterMPONE.filterCell(kv)); - assertFalse(filterMPONE.filterRow()); + // FilterList.filterRow() returns true because previously "z" was filtered out (return true) by + // PrefixFilter.filterRowKey() + assertTrue(filterMPONE.filterRow()); /* We should filter any row */ rowkey = Bytes.toBytes("z"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestPrefixFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestPrefixFilter.java index 2a23c76bd7fe..34e15cf920bd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestPrefixFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestPrefixFilter.java @@ -17,9 +17,15 @@ */ package org.apache.hadoop.hbase.filter; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.testclassification.FilterTests; import org.apache.hadoop.hbase.testclassification.SmallTests; @@ -40,7 +46,6 @@ public class TestPrefixFilter { static final char FIRST_CHAR = 'a'; static final char LAST_CHAR = 'e'; static final String HOST_PREFIX = "org.apache.site-"; - static final byte[] GOOD_BYTES = Bytes.toBytes("abc"); @Before public void setUp() throws Exception { @@ -82,13 +87,182 @@ private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throw } String yahooSite = "com.yahoo.www"; byte[] yahooSiteBytes = Bytes.toBytes(yahooSite); - assertTrue("Failed with character " + yahooSite, - filter.filterRowKey(KeyValueUtil.createFirstOnRow(yahooSiteBytes))); - assertEquals(filter.filterAllRemaining(), lastFilterAllRemaining); + KeyValue yahooSiteCell = KeyValueUtil.createFirstOnRow(yahooSiteBytes); + assertFalse("Failed with character " + yahooSite, filter.filterRowKey(yahooSiteCell)); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(yahooSiteCell)); + assertEquals(lastFilterAllRemaining, filter.filterAllRemaining()); } private byte[] createRow(final char c) { return Bytes.toBytes(HOST_PREFIX + Character.toString(c)); } + @Test + public void shouldProvideHintWhenKeyBefore() { + byte[] prefix = Bytes.toBytes("gg"); + PrefixFilter filter = new PrefixFilter(prefix); + + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); + + // Should include this row so that filterCell() will be invoked. + assertFalse(filter.filterRowKey(cell)); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); + Cell actualCellHint = filter.getNextCellHint(cell); + assertNotNull(actualCellHint); + Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix); + assertEquals(expectedCellHint, actualCellHint); + assertFalse(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void shouldProvideHintWhenKeyBeforeAndShorter() { + byte[] prefix = Bytes.toBytes("gggg"); + PrefixFilter filter = new PrefixFilter(prefix); + + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); + + // Should include this row so that filterCell() will be invoked. + assertFalse(filter.filterRowKey(cell)); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); + Cell actualCellHint = filter.getNextCellHint(cell); + assertNotNull(actualCellHint); + Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix); + assertEquals(expectedCellHint, actualCellHint); + assertFalse(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void shouldIncludeWhenKeyMatches() { + PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg")); + + KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("gg")); + + assertFalse(filter.filterRowKey(matchingCell)); + assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell)); + assertFalse(filter.filterAllRemaining()); + assertFalse(filter.filterRow()); + } + + @Test + public void shouldReturnNextRowWhenKeyAfter() { + PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg")); + + KeyValue afterCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pp")); + + assertTrue(filter.filterRowKey(afterCell)); + assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(afterCell)); + assertTrue(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void shouldProvideHintWhenKeyBeforeReversed() { + PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa")); + filter.setReversed(true); + + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x")); + + // Should include this row so that filterCell() will be invoked. + assertFalse(filter.filterRowKey(cell)); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); + Cell actualCellHint = filter.getNextCellHint(cell); + assertNotNull(actualCellHint); + Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes("ab")); + assertEquals(expectedCellHint, actualCellHint); + assertFalse(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void hintShouldIncreaseLastNonMaxByteWhenReversed() { + PrefixFilter filter = new PrefixFilter(new byte[] { 'a', 'a', Byte.MAX_VALUE }); + filter.setReversed(true); + + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x")); + + // Should include this row so that filterCell() will be invoked. + assertFalse(filter.filterRowKey(cell)); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); + Cell actualCellHint = filter.getNextCellHint(cell); + assertNotNull(actualCellHint); + Cell expectedCellHint = KeyValueUtil.createFirstOnRow(new byte[] { 'a', 'b', Byte.MAX_VALUE }); + assertEquals(expectedCellHint, actualCellHint); + assertFalse(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void shouldIncludeWhenKeyMatchesReversed() { + PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa")); + filter.setReversed(true); + + KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); + + assertFalse(filter.filterRowKey(matchingCell)); + assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell)); + assertFalse(filter.filterAllRemaining()); + assertFalse(filter.filterRow()); + } + + @Test + public void shouldReturnNextRowWhenKeyAfterReversed() { + PrefixFilter filter = new PrefixFilter(Bytes.toBytes("dd")); + filter.setReversed(true); + + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); + + assertTrue(filter.filterRowKey(cell)); + assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell)); + assertTrue(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void hintShouldNotIncreaseMaxBytesWhenReversed() { + PrefixFilter filter = + new PrefixFilter(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE }); + filter.setReversed(true); + + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x")); + + assertTrue(filter.filterRowKey(cell)); + assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell)); + Cell actualCellHint = filter.getNextCellHint(cell); + assertNotNull(actualCellHint); + Cell expectedCellHint = + KeyValueUtil.createFirstOnRow(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE }); + assertEquals(expectedCellHint, actualCellHint); + assertTrue(filter.filterAllRemaining()); + assertTrue(filter.filterRow()); + } + + @Test + public void shouldNotThrowWhenCreatedWithNullPrefix() { + PrefixFilter filter = new PrefixFilter(null); + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("doesNotMatter")); + + assertNull(filter.getNextCellHint(cell)); + filter.setReversed(true); + assertNull(filter.getNextCellHint(cell)); + } + + @Test + public void shouldNotThrowWhenCreatedWithEmptyByteArrayPrefix() { + byte[] emptyPrefix = {}; + KeyValue emptyPrefixCell = KeyValueUtil.createFirstOnRow(emptyPrefix); + KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("doesNotMatter")); + + PrefixFilter filter = new PrefixFilter(emptyPrefix); + + Cell forwardNextCellHint = filter.getNextCellHint(cell); + assertNotNull(forwardNextCellHint); + assertEquals(emptyPrefixCell, forwardNextCellHint); + + filter.setReversed(true); + Cell reverseNextCellHint = filter.getNextCellHint(cell); + assertNotNull(reverseNextCellHint); + assertEquals(emptyPrefixCell, reverseNextCellHint); + } }