Skip to content

Commit dae078e

Browse files
authored
HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to compare 8 bytes each time (#5354)
Signed-off-by: Duo Zhang <[email protected]>
1 parent 8ccb910 commit dae078e

File tree

4 files changed

+329
-37
lines changed

4 files changed

+329
-37
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java

Lines changed: 155 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ static abstract class Converter {
8080
abstract int putLong(ByteBuffer buffer, int index, long val);
8181
}
8282

83+
static abstract class CommonPrefixer {
84+
abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
85+
int rightOffset, int rightLength);
86+
87+
abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right,
88+
int rightOffset, int rightLength);
89+
}
90+
8391
static class ComparerHolder {
8492
static final String UNSAFE_COMPARER_NAME = ComparerHolder.class.getName() + "$UnsafeComparer";
8593

@@ -322,6 +330,111 @@ int putLong(ByteBuffer buffer, int index, long val) {
322330
}
323331
}
324332

333+
static class CommonPrefixerHolder {
334+
static final String UNSAFE_COMMON_PREFIXER_NAME =
335+
CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
336+
337+
static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
338+
339+
static CommonPrefixer getBestCommonPrefixer() {
340+
try {
341+
Class<? extends CommonPrefixer> theClass =
342+
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
343+
344+
return theClass.getConstructor().newInstance();
345+
} catch (Throwable t) { // ensure we really catch *everything*
346+
return PureJavaCommonPrefixer.INSTANCE;
347+
}
348+
}
349+
350+
static final class PureJavaCommonPrefixer extends CommonPrefixer {
351+
static final PureJavaCommonPrefixer INSTANCE = new PureJavaCommonPrefixer();
352+
353+
private PureJavaCommonPrefixer() {
354+
}
355+
356+
@Override
357+
public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
358+
int rightOffset, int rightLength) {
359+
int length = Math.min(leftLength, rightLength);
360+
int result = 0;
361+
362+
while (
363+
result < length
364+
&& ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result]
365+
) {
366+
result++;
367+
}
368+
369+
return result;
370+
}
371+
372+
@Override
373+
int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right,
374+
int rightOffset, int rightLength) {
375+
int length = Math.min(leftLength, rightLength);
376+
int result = 0;
377+
378+
while (
379+
result < length && ByteBufferUtils.toByte(left, leftOffset + result)
380+
== ByteBufferUtils.toByte(right, rightOffset + result)
381+
) {
382+
result++;
383+
}
384+
385+
return result;
386+
}
387+
}
388+
389+
static final class UnsafeCommonPrefixer extends CommonPrefixer {
390+
391+
static {
392+
if (!UNSAFE_UNALIGNED) {
393+
throw new Error();
394+
}
395+
}
396+
397+
public UnsafeCommonPrefixer() {
398+
}
399+
400+
@Override
401+
public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
402+
int rightOffset, int rightLength) {
403+
long offset1Adj;
404+
Object refObj1 = null;
405+
if (left.isDirect()) {
406+
offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
407+
} else {
408+
offset1Adj = leftOffset + left.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
409+
refObj1 = left.array();
410+
}
411+
return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, right,
412+
rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET, rightLength);
413+
}
414+
415+
@Override
416+
public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right,
417+
int rightOffset, int rightLength) {
418+
long offset1Adj, offset2Adj;
419+
Object refObj1 = null, refObj2 = null;
420+
if (left.isDirect()) {
421+
offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
422+
} else {
423+
offset1Adj = leftOffset + left.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
424+
refObj1 = left.array();
425+
}
426+
if (right.isDirect()) {
427+
offset2Adj = rightOffset + UnsafeAccess.directBufferAddress(right);
428+
} else {
429+
offset2Adj = rightOffset + right.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
430+
refObj2 = right.array();
431+
}
432+
return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, refObj2, offset2Adj,
433+
rightLength);
434+
}
435+
}
436+
}
437+
325438
/**
326439
* Similar to {@link WritableUtils#writeVLong(java.io.DataOutput, long)}, but writes to a
327440
* {@link ByteBuffer}.
@@ -744,14 +857,7 @@ public static void copyFromBufferToBuffer(ByteBuffer in, ByteBuffer out, int sou
744857
*/
745858
public static int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
746859
int rightOffset, int rightLength) {
747-
int length = Math.min(leftLength, rightLength);
748-
int result = 0;
749-
750-
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
751-
result++;
752-
}
753-
754-
return result;
860+
return Bytes.findCommonPrefix(left, right, leftLength, rightLength, leftOffset, rightOffset);
755861
}
756862

757863
/**
@@ -765,17 +871,8 @@ public static int findCommonPrefix(byte[] left, int leftOffset, int leftLength,
765871
*/
766872
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength,
767873
ByteBuffer right, int rightOffset, int rightLength) {
768-
int length = Math.min(leftLength, rightLength);
769-
int result = 0;
770-
771-
while (
772-
result < length && ByteBufferUtils.toByte(left, leftOffset + result)
773-
== ByteBufferUtils.toByte(right, rightOffset + result)
774-
) {
775-
result++;
776-
}
777-
778-
return result;
874+
return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength,
875+
right, rightOffset, rightLength);
779876
}
780877

781878
/**
@@ -789,17 +886,8 @@ public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLeng
789886
*/
790887
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
791888
int rightOffset, int rightLength) {
792-
int length = Math.min(leftLength, rightLength);
793-
int result = 0;
794-
795-
while (
796-
result < length
797-
&& ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result]
798-
) {
799-
result++;
800-
}
801-
802-
return result;
889+
return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength,
890+
right, rightOffset, rightLength);
803891
}
804892

805893
/**
@@ -972,6 +1060,43 @@ static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, i
9721060
return l1 - l2;
9731061
}
9741062

1063+
static int findCommonPrefixUnsafe(Object left, long leftOffset, int leftLength, Object right,
1064+
long rightOffset, int rightLength) {
1065+
final int stride = 8;
1066+
final int minLength = Math.min(leftLength, rightLength);
1067+
int strideLimit = minLength & ~(stride - 1);
1068+
int result = 0;
1069+
int i;
1070+
1071+
for (i = 0; i < strideLimit; i += stride) {
1072+
long lw = HBasePlatformDependent.getLong(left, leftOffset + (long) i);
1073+
long rw = HBasePlatformDependent.getLong(right, rightOffset + (long) i);
1074+
1075+
if (lw != rw) {
1076+
if (!UnsafeAccess.LITTLE_ENDIAN) {
1077+
return result + (Long.numberOfLeadingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1078+
} else {
1079+
return result + (Long.numberOfTrailingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1080+
}
1081+
} else {
1082+
result += Bytes.SIZEOF_LONG;
1083+
}
1084+
}
1085+
1086+
// The epilogue to cover the last (minLength % stride) elements.
1087+
for (; i < minLength; i++) {
1088+
byte il = HBasePlatformDependent.getByte(left, leftOffset + i);
1089+
byte ir = HBasePlatformDependent.getByte(right, rightOffset + i);
1090+
if (il != ir) {
1091+
return result;
1092+
} else {
1093+
result++;
1094+
}
1095+
}
1096+
1097+
return result;
1098+
}
1099+
9751100
/**
9761101
* Reads a short value at the given buffer's offset.
9771102
* @param buffer input byte buffer to read

hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java

Lines changed: 100 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,11 @@ static abstract class Converter {
11791179

11801180
}
11811181

1182+
static abstract class CommonPrefixer {
1183+
abstract int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
1184+
int rightOffset, int rightLength);
1185+
}
1186+
11821187
static Comparer<byte[]> lexicographicalComparerJavaImpl() {
11831188
return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
11841189
}
@@ -1453,6 +1458,99 @@ public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, i
14531458
}
14541459
}
14551460

1461+
static class CommonPrefixerHolder {
1462+
static final String UNSAFE_COMMON_PREFIXER_NAME =
1463+
CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
1464+
1465+
static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
1466+
1467+
static CommonPrefixer getBestCommonPrefixer() {
1468+
try {
1469+
Class<? extends CommonPrefixer> theClass =
1470+
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
1471+
1472+
return theClass.getConstructor().newInstance();
1473+
} catch (Throwable t) { // ensure we really catch *everything*
1474+
return CommonPrefixerHolder.PureJavaCommonPrefixer.INSTANCE;
1475+
}
1476+
}
1477+
1478+
static final class PureJavaCommonPrefixer extends CommonPrefixer {
1479+
static final PureJavaCommonPrefixer INSTANCE = new PureJavaCommonPrefixer();
1480+
1481+
private PureJavaCommonPrefixer() {
1482+
}
1483+
1484+
@Override
1485+
public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
1486+
int rightOffset, int rightLength) {
1487+
int length = Math.min(leftLength, rightLength);
1488+
int result = 0;
1489+
1490+
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
1491+
result++;
1492+
}
1493+
return result;
1494+
}
1495+
}
1496+
1497+
static final class UnsafeCommonPrefixer extends CommonPrefixer {
1498+
1499+
static {
1500+
if (!UNSAFE_UNALIGNED) {
1501+
throw new Error();
1502+
}
1503+
1504+
// sanity check - this should never fail
1505+
if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) {
1506+
throw new AssertionError();
1507+
}
1508+
}
1509+
1510+
public UnsafeCommonPrefixer() {
1511+
}
1512+
1513+
@Override
1514+
public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
1515+
int rightOffset, int rightLength) {
1516+
final int stride = 8;
1517+
final int minLength = Math.min(leftLength, rightLength);
1518+
int strideLimit = minLength & ~(stride - 1);
1519+
final long leftOffsetAdj = leftOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
1520+
final long rightOffsetAdj = rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
1521+
int result = 0;
1522+
int i;
1523+
1524+
for (i = 0; i < strideLimit; i += stride) {
1525+
long lw = HBasePlatformDependent.getLong(left, leftOffsetAdj + i);
1526+
long rw = HBasePlatformDependent.getLong(right, rightOffsetAdj + i);
1527+
if (lw != rw) {
1528+
if (!UnsafeAccess.LITTLE_ENDIAN) {
1529+
return result + (Long.numberOfLeadingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1530+
} else {
1531+
return result + (Long.numberOfTrailingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1532+
}
1533+
} else {
1534+
result += Bytes.SIZEOF_LONG;
1535+
}
1536+
}
1537+
1538+
// The epilogue to cover the last (minLength % stride) elements.
1539+
for (; i < minLength; i++) {
1540+
int il = (left[leftOffset + i]);
1541+
int ir = (right[rightOffset + i]);
1542+
if (il != ir) {
1543+
return result;
1544+
} else {
1545+
result++;
1546+
}
1547+
}
1548+
1549+
return result;
1550+
}
1551+
}
1552+
}
1553+
14561554
/**
14571555
* Lexicographically determine the equality of two arrays.
14581556
* @param left left operand
@@ -2429,12 +2527,7 @@ public static int searchDelimiterIndexInReverse(final byte[] b, final int offset
24292527

24302528
public static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength,
24312529
int leftOffset, int rightOffset) {
2432-
int length = Math.min(leftLength, rightLength);
2433-
int result = 0;
2434-
2435-
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
2436-
result++;
2437-
}
2438-
return result;
2530+
return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength,
2531+
right, rightOffset, rightLength);
24392532
}
24402533
}

hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,37 @@ public void testEquals() {
606606
assertTrue(ByteBufferUtils.equals(bb, 0, a.length, a, 0, a.length));
607607
}
608608

609+
@Test
610+
public void testFindCommonPrefix() {
611+
ByteBuffer bb1 = ByteBuffer.allocate(135);
612+
ByteBuffer bb2 = ByteBuffer.allocate(135);
613+
ByteBuffer bb3 = ByteBuffer.allocateDirect(135);
614+
byte[] b = new byte[71];
615+
616+
fillBB(bb1, (byte) 5);
617+
fillBB(bb2, (byte) 5);
618+
fillBB(bb3, (byte) 5);
619+
fillArray(b, (byte) 5);
620+
621+
assertEquals(135,
622+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining()));
623+
assertEquals(71, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), b, 0, b.length));
624+
assertEquals(135,
625+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb3, 0, bb3.remaining()));
626+
assertEquals(71, ByteBufferUtils.findCommonPrefix(bb3, 0, bb3.remaining(), b, 0, b.length));
627+
628+
b[13] = 9;
629+
assertEquals(13, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), b, 0, b.length));
630+
631+
bb2.put(134, (byte) 6);
632+
assertEquals(134,
633+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining()));
634+
635+
bb2.put(6, (byte) 4);
636+
assertEquals(6,
637+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining()));
638+
}
639+
609640
private static void fillBB(ByteBuffer bb, byte b) {
610641
for (int i = bb.position(); i < bb.limit(); i++) {
611642
bb.put(i, b);

0 commit comments

Comments
 (0)