3838 * This is backed by a power-of-2-sized hash table, using quadratic probing with triangular numbers,
3939 * which is guaranteed to exhaust the space.
4040 * <p>
41- * The map can support up to 2^31 keys because we use 32 bit MurmurHash. If the key cardinality is
42- * higher than this, you should probably be using sorting instead of hashing for better cache
43- * locality.
41+ * The map can support up to 2^30 keys. If the key cardinality is higher than this, you should
42+ * probably be using sorting instead of hashing for better cache locality.
4443 * <p>
4544 * This class is not thread safe.
4645 */
@@ -81,6 +80,12 @@ public final class BytesToBytesMap {
8180 */
8281 private static final long PAGE_SIZE_BYTES = 1L << 26 ; // 64 megabytes
8382
83+ /**
84+ * The maximum number of keys that BytesToBytesMap supports.
85+ */
86+ @ VisibleForTesting
87+ static final int MAX_CAPACITY = (1 << 30 );
88+
8489 // This choice of page table size and page size means that we can address up to 500 gigabytes
8590 // of memory.
8691
@@ -150,6 +155,13 @@ public BytesToBytesMap(
150155 this .loadFactor = loadFactor ;
151156 this .loc = new Location ();
152157 this .enablePerfMetrics = enablePerfMetrics ;
158+ if (initialCapacity <= 0 ) {
159+ throw new IllegalArgumentException ("Initial capacity must be greater than 0" );
160+ }
161+ if (initialCapacity > MAX_CAPACITY ) {
162+ throw new IllegalArgumentException (
163+ "Initial capacity " + initialCapacity + " exceeds maximum capacity of " + MAX_CAPACITY );
164+ }
153165 allocate (initialCapacity );
154166 }
155167
@@ -417,6 +429,9 @@ public void putNewKey(
417429 isDefined = true ;
418430 assert (keyLengthBytes % 8 == 0 );
419431 assert (valueLengthBytes % 8 == 0 );
432+ if (size == MAX_CAPACITY ) {
433+ throw new IllegalStateException ("BytesToBytesMap has reached maximum capacity" );
434+ }
420435 // Here, we'll copy the data into our data pages. Because we only store a relative offset from
421436 // the key address instead of storing the absolute address of the value, the key and value
422437 // must be stored in the same memory page.
@@ -468,7 +483,7 @@ public void putNewKey(
468483 longArray .set (pos * 2 + 1 , keyHashcode );
469484 updateAddressesAndSizes (storedKeyAddress );
470485 isDefined = true ;
471- if (size > growthThreshold ) {
486+ if (size > growthThreshold && size < MAX_CAPACITY ) {
472487 growAndRehash ();
473488 }
474489 }
@@ -481,7 +496,9 @@ public void putNewKey(
481496 * @param capacity the new map capacity
482497 */
483498 private void allocate (int capacity ) {
484- capacity = Math .max ((int ) Math .min (Integer .MAX_VALUE , nextPowerOf2 (capacity )), 64 );
499+ assert (capacity >= 0 );
500+ // The capacity needs to be divisible by 64 so that our bit set can be sized properly
501+ capacity = Math .max ((int ) Math .min (MAX_CAPACITY , nextPowerOf2 (capacity )), 64 );
485502 longArray = new LongArray (memoryManager .allocate (capacity * 8 * 2 ));
486503 bitset = new BitSet (MemoryBlock .fromLongArray (new long [capacity / 64 ]));
487504
@@ -556,7 +573,8 @@ int getNumDataPages() {
556573 /**
557574 * Grows the size of the hash table and re-hash everything.
558575 */
559- private void growAndRehash () {
576+ @ VisibleForTesting
577+ void growAndRehash () {
560578 long resizeStartTime = -1 ;
561579 if (enablePerfMetrics ) {
562580 resizeStartTime = System .nanoTime ();
@@ -567,7 +585,7 @@ private void growAndRehash() {
567585 final int oldCapacity = (int ) oldBitSet .capacity ();
568586
569587 // Allocate the new data structures
570- allocate (Math .min (Integer . MAX_VALUE , growthStrategy .nextCapacity (oldCapacity )));
588+ allocate (Math .min (growthStrategy .nextCapacity (oldCapacity ), MAX_CAPACITY ));
571589
572590 // Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
573591 for (int pos = oldBitSet .nextSetBit (0 ); pos >= 0 ; pos = oldBitSet .nextSetBit (pos + 1 )) {
0 commit comments