@@ -620,25 +620,76 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
620620 // / is stored inline. We work around this contradiction by considering the
621621 // / first index to always be occupied with a value that never matches any key.
622622 struct IndexStorage {
623+ using RawType = uintptr_t ;
624+
625+ RawType Value;
626+
627+ static constexpr uintptr_t log2 (uintptr_t x) {
628+ return x <= 1 ? 0 : log2 (x >> 1 ) + 1 ;
629+ }
630+
631+ static constexpr uintptr_t InlineIndexBits = 4 ;
632+ static constexpr uintptr_t InlineIndexMask = 0xF ;
633+ static constexpr uintptr_t InlineCapacity =
634+ sizeof (RawType) * CHAR_BIT / InlineIndexBits;
635+ static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity);
636+
637+ // Indices can be stored in different ways, depending on how big they need
638+ // to be. The index mode is stored in the bottom two bits of Value. The
639+ // meaning of the rest of Value depends on the mode.
640+ enum class IndexMode {
641+ // Value is treated as an array of four-bit integers, storing the indices.
642+ // The first element overlaps with the mode, and is never used.
643+ Inline,
644+
645+ // The rest of Value holds a pointer to storage. The first byte of this
646+ // storage holds the log2 of the storage capacity. The storage is treated
647+ // as an array of 8, 16, or 32-bit integers. The first element overlaps
648+ // with the capacity, and is never used.
649+ Array8,
650+ Array16,
651+ Array32,
652+ };
653+
654+ IndexStorage () : Value(0 ) {}
655+ IndexStorage (RawType value) : Value(value) {}
656+ IndexStorage (void *ptr, unsigned indexSize, uint8_t capacityLog2) {
657+ assert (capacityLog2 > InlineCapacityLog2);
658+ IndexMode mode;
659+ switch (indexSize) {
660+ case sizeof (uint8_t ):
661+ mode = IndexMode::Array8;
662+ break ;
663+ case sizeof (uint16_t ):
664+ mode = IndexMode::Array16;
665+ break ;
666+ case sizeof (uint32_t ):
667+ mode = IndexMode::Array32;
668+ break ;
669+ default :
670+ swift_unreachable (" unknown index size" );
671+ }
672+ Value = reinterpret_cast <uintptr_t >(ptr) | static_cast <uintptr_t >(mode);
673+ *reinterpret_cast <uint8_t *>(ptr) = capacityLog2;
674+ }
675+
676+ bool valueIsPointer () { return Value & 3 ; }
677+
678+ void *pointer () {
679+ if (valueIsPointer ())
680+ return (void *)(Value & (RawType)~3 );
681+ return nullptr ;
682+ }
683+
684+ IndexMode indexMode () { return IndexMode (Value & 3 ); }
685+
623686 // Index size is variable based on capacity, either 8, 16, or 32 bits.
624687 //
625688 // This is somewhat conservative. We could have, for example, a capacity of
626689 // 512 but a maximum index of only 200, which would still allow for 8-bit
627690 // indices. However, taking advantage of this would require reallocating
628691 // the index storage when the element count crossed a threshold, which is
629692 // more complex, and the advantages are minimal. This keeps it simple.
630- //
631- // The first byte of the storage is the log 2 of the capacity. The remaining
632- // storage is then an array of 8, 16, or 32 bit integers, depending on the
633- // capacity number. This union allows us to access the capacity, and then
634- // access the rest of the storage by taking the address of one of the
635- // IndexZero members and indexing into it (always avoiding index 0).
636- union {
637- uint8_t CapacityLog2;
638- std::atomic<uint8_t > IndexZero8;
639- std::atomic<uint16_t > IndexZero16;
640- std::atomic<uint32_t > IndexZero32;
641- };
642693
643694 // Get the size, in bytes, of the index needed for the given capacity.
644695 static unsigned indexSize (uint8_t capacityLog2) {
@@ -649,46 +700,66 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
649700 return sizeof (uint32_t );
650701 }
651702
652- unsigned indexSize () { return indexSize (CapacityLog2); }
703+ uint8_t getCapacityLog2 () {
704+ if (auto *ptr = pointer ())
705+ return *reinterpret_cast <uint8_t *>(ptr);
706+ return InlineCapacityLog2;
707+ }
653708
654- static IndexStorage * allocate (size_t capacityLog2) {
709+ static IndexStorage allocate (size_t capacityLog2) {
655710 assert (capacityLog2 > 0 );
656711 size_t capacity = 1UL << capacityLog2;
657- auto *ptr = reinterpret_cast <IndexStorage *>(
658- calloc (capacity, indexSize (capacityLog2)) );
712+ unsigned size = indexSize (capacityLog2);
713+ auto *ptr = calloc (capacity, size );
659714 if (!ptr)
660715 swift::crash (" Could not allocate memory." );
661- ptr->CapacityLog2 = capacityLog2;
662- return ptr;
716+ return IndexStorage (ptr, size, capacityLog2);
663717 }
664718
665719 unsigned loadIndexAt (size_t i, std::memory_order order) {
666720 assert (i > 0 && " index zero is off-limits, used to store capacity" );
667-
668- switch (indexSize ()) {
669- case sizeof (uint8_t ):
670- return (&IndexZero8)[i].load (order);
671- case sizeof (uint16_t ):
672- return (&IndexZero16)[i].load (order);
673- case sizeof (uint32_t ):
674- return (&IndexZero32)[i].load (order);
675- default :
676- swift_unreachable (" unknown index size" );
721+ assert (i < (1 << getCapacityLog2 ()) &&
722+ " index is off the end of the indices" );
723+
724+ switch (indexMode ()) {
725+ case IndexMode::Inline:
726+ return (Value >> (i * InlineIndexBits)) & InlineIndexMask;
727+ case IndexMode::Array8:
728+ return ((std::atomic<uint8_t > *)pointer ())[i].load (order);
729+ case IndexMode::Array16:
730+ return ((std::atomic<uint16_t > *)pointer ())[i].load (order);
731+ case IndexMode::Array32:
732+ return ((std::atomic<uint32_t > *)pointer ())[i].load (order);
677733 }
678734 }
679735
680- void storeIndexAt (unsigned value, size_t i, std::memory_order order) {
736+ void storeIndexAt (std::atomic<RawType> *inlineStorage, unsigned value,
737+ size_t i, std::memory_order order) {
681738 assert (i > 0 && " index zero is off-limits, used to store capacity" );
682-
683- switch (indexSize ()) {
684- case sizeof (uint8_t ):
685- return (&IndexZero8)[i].store (value, order);
686- case sizeof (uint16_t ):
687- return (&IndexZero16)[i].store (value, order);
688- case sizeof (uint32_t ):
689- return (&IndexZero32)[i].store (value, order);
690- default :
691- swift_unreachable (" unknown index size" );
739+ assert (i < (1 << getCapacityLog2 ()) &&
740+ " index is off the end of the indices" );
741+
742+ switch (indexMode ()) {
743+ case IndexMode::Inline: {
744+ assert (value == (value & InlineIndexMask) && " value is too big to fit" );
745+ auto shift = i * InlineIndexBits;
746+ assert ((Value & (InlineIndexMask << shift)) == 0 &&
747+ " can't overwrite an existing index" );
748+ assert (Value == inlineStorage->load (std::memory_order_relaxed) &&
749+ " writing with a stale IndexStorage" );
750+ auto newStorage = Value | ((RawType)value << shift);
751+ inlineStorage->store (newStorage, order);
752+ break ;
753+ }
754+ case IndexMode::Array8:
755+ ((std::atomic<uint8_t > *)pointer ())[i].store (value, order);
756+ break ;
757+ case IndexMode::Array16:
758+ ((std::atomic<uint16_t > *)pointer ())[i].store (value, order);
759+ break ;
760+ case IndexMode::Array32:
761+ ((std::atomic<uint32_t > *)pointer ())[i].store (value, order);
762+ break ;
692763 }
693764 }
694765 };
@@ -726,7 +797,11 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
726797 std::atomic<ElemTy *> Elements{nullptr };
727798
728799 // / The array of indices.
729- std::atomic<IndexStorage *> Indices{nullptr };
800+ // /
801+ // / This has to be stored as a IndexStorage::RawType instead of a IndexStorage
802+ // / because some of our targets don't support interesting structs as atomic
803+ // / types. See also MetadataCache::TrackingInfo which uses the same technique.
804+ std::atomic<typename IndexStorage::RawType> Indices{0 };
730805
731806 // / The writer lock, which must be taken before any mutation of the table.
732807 StaticMutex WriterLock;
@@ -778,18 +853,17 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
778853 // / returning the new array with all existing indices copied into it. This
779854 // / operation performs a rehash, so that the indices are in the correct
780855 // / location in the new array.
781- IndexStorage *resize (IndexStorage *indices, uint8_t indicesCapacityLog2,
782- ElemTy *elements) {
783- // Double the size. Start with 16 (fits into 16-byte malloc
784- // bucket), which is 2^4.
785- size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4 ;
856+ IndexStorage resize (IndexStorage indices, uint8_t indicesCapacityLog2,
857+ ElemTy *elements) {
858+ // Double the size.
859+ size_t newCapacityLog2 = indicesCapacityLog2 + 1 ;
786860 size_t newMask = (1UL << newCapacityLog2) - 1 ;
787861
788- IndexStorage * newIndices = IndexStorage::allocate (newCapacityLog2);
862+ IndexStorage newIndices = IndexStorage::allocate (newCapacityLog2);
789863
790864 size_t indicesCount = 1UL << indicesCapacityLog2;
791865 for (size_t i = 1 ; i < indicesCount; i++) {
792- unsigned index = indices-> loadIndexAt (i, std::memory_order_relaxed);
866+ unsigned index = indices. loadIndexAt (i, std::memory_order_relaxed);
793867 if (index == 0 )
794868 continue ;
795869
@@ -799,15 +873,16 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
799873 size_t newI = hash & newMask;
800874 // Index 0 is unusable (occupied by the capacity), so always skip it.
801875 while (newI == 0 ||
802- newIndices-> loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
876+ newIndices. loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
803877 newI = (newI + 1 ) & newMask;
804878 }
805- newIndices-> storeIndexAt (index, newI, std::memory_order_relaxed);
879+ newIndices. storeIndexAt (nullptr , index, newI, std::memory_order_relaxed);
806880 }
807881
808- Indices.store (newIndices, std::memory_order_release);
882+ Indices.store (newIndices. Value , std::memory_order_release);
809883
810- FreeListNode::add (&FreeList, indices);
884+ if (auto *ptr = indices.pointer ())
885+ FreeListNode::add (&FreeList, ptr);
811886
812887 return newIndices;
813888 }
@@ -818,20 +893,18 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
818893 // / of the new element would be stored.
819894 template <class KeyTy >
820895 static std::pair<ElemTy *, unsigned >
821- find (const KeyTy &key, IndexStorage * indices, size_t elementCount,
896+ find (const KeyTy &key, IndexStorage indices, size_t elementCount,
822897 ElemTy *elements) {
823- if (!indices)
824- return {nullptr , 0 };
825898 auto hash = hash_value (key);
826- auto indicesMask = (1UL << indices-> CapacityLog2 ) - 1 ;
899+ auto indicesMask = (1UL << indices. getCapacityLog2 () ) - 1 ;
827900
828901 auto i = hash & indicesMask;
829902 while (true ) {
830903 // Index 0 is used for the mask and is not actually an index.
831904 if (i == 0 )
832905 i++;
833906
834- auto index = indices-> loadIndexAt (i, std::memory_order_acquire);
907+ auto index = indices. loadIndexAt (i, std::memory_order_acquire);
835908 // Element indices are 1-based, 0 means no entry.
836909 if (index == 0 )
837910 return {nullptr , i};
@@ -864,12 +937,12 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
864937 // / Readers take a snapshot of the hash map, then work with the snapshot.
865938 class Snapshot {
866939 ConcurrentReadableHashMap *Map;
867- IndexStorage * Indices;
940+ IndexStorage Indices;
868941 ElemTy *Elements;
869942 size_t ElementCount;
870943
871944 public:
872- Snapshot (ConcurrentReadableHashMap *map, IndexStorage * indices,
945+ Snapshot (ConcurrentReadableHashMap *map, IndexStorage indices,
873946 ElemTy *elements, size_t elementCount)
874947 : Map(map), Indices(indices), Elements(elements),
875948 ElementCount (elementCount) {}
@@ -885,7 +958,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
885958 // / Search for an element matching the given key. Returns a pointer to the
886959 // / found element, or nullptr if no matching element exists.
887960 template <class KeyTy > const ElemTy *find (const KeyTy &key) {
888- if (!Indices || !ElementCount || !Elements)
961+ if (!Indices. Value || !ElementCount || !Elements)
889962 return nullptr ;
890963 return ConcurrentReadableHashMap::find (key, Indices, ElementCount,
891964 Elements)
@@ -917,7 +990,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
917990 // pointer can just mean a concurrent insert that triggered a resize of the
918991 // elements array. This is harmless aside from a small performance hit, and
919992 // should not happen often.
920- IndexStorage * indices;
993+ IndexStorage indices;
921994 size_t elementCount;
922995 ElemTy *elements;
923996 ElemTy *elements2;
@@ -951,11 +1024,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
9511024 void getOrInsert (KeyTy key, const Call &call) {
9521025 StaticScopedLock guard (WriterLock);
9531026
954- auto *indices = Indices.load (std::memory_order_relaxed);
955- if (!indices)
956- indices = resize (indices, 0 , nullptr );
957-
958- auto indicesCapacityLog2 = indices->CapacityLog2 ;
1027+ auto indices = IndexStorage{Indices.load (std::memory_order_relaxed)};
1028+ auto indicesCapacityLog2 = indices.getCapacityLog2 ();
9591029 auto elementCount = ElementCount.load (std::memory_order_relaxed);
9601030 auto *elements = Elements.load (std::memory_order_relaxed);
9611031
@@ -990,8 +1060,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
9901060 assert (hash_value (key) == hash_value (*element) &&
9911061 " Element must have the same hash code as its key." );
9921062 ElementCount.store (elementCount + 1 , std::memory_order_release);
993- indices-> storeIndexAt (elementCount + 1 , found.second ,
994- std::memory_order_release);
1063+ indices. storeIndexAt (&Indices, elementCount + 1 , found.second ,
1064+ std::memory_order_release);
9951065 }
9961066
9971067 deallocateFreeListIfSafe ();
@@ -1002,17 +1072,18 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
10021072 void clear () {
10031073 StaticScopedLock guard (WriterLock);
10041074
1005- auto * indices = Indices.load (std::memory_order_relaxed);
1075+ IndexStorage indices = Indices.load (std::memory_order_relaxed);
10061076 auto *elements = Elements.load (std::memory_order_relaxed);
10071077
10081078 // Order doesn't matter here, snapshots will gracefully handle any field
10091079 // being NULL/0 while the others are not.
1010- Indices.store (nullptr , std::memory_order_relaxed);
1080+ Indices.store (0 , std::memory_order_relaxed);
10111081 ElementCount.store (0 , std::memory_order_relaxed);
10121082 Elements.store (nullptr , std::memory_order_relaxed);
10131083 ElementCapacity = 0 ;
10141084
1015- FreeListNode::add (&FreeList, indices);
1085+ if (auto *ptr = indices.pointer ())
1086+ FreeListNode::add (&FreeList, ptr);
10161087 FreeListNode::add (&FreeList, elements);
10171088
10181089 deallocateFreeListIfSafe ();
0 commit comments