@@ -624,25 +624,76 @@ struct ConcurrentReadableHashMap {
624624 // / is stored inline. We work around this contradiction by considering the
625625 // / first index to always be occupied with a value that never matches any key.
626626 struct IndexStorage {
627+ using RawType = uintptr_t ;
628+
629+ RawType Value;
630+
631+ static constexpr uintptr_t log2 (uintptr_t x) {
632+ return x <= 1 ? 0 : log2 (x >> 1 ) + 1 ;
633+ }
634+
635+ static constexpr uintptr_t InlineIndexBits = 4 ;
636+ static constexpr uintptr_t InlineIndexMask = 0xF ;
637+ static constexpr uintptr_t InlineCapacity =
638+ sizeof (RawType) * CHAR_BIT / InlineIndexBits;
639+ static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity);
640+
641+ // Indices can be stored in different ways, depending on how big they need
642+ // to be. The index mode is stored in the bottom two bits of Value. The
643+ // meaning of the rest of Value depends on the mode.
644+ enum class IndexMode {
645+ // Value is treated as an array of four-bit integers, storing the indices.
646+ // The first element overlaps with the mode, and is never used.
647+ Inline,
648+
649+ // The rest of Value holds a pointer to storage. The first byte of this
650+ // storage holds the log2 of the storage capacity. The storage is treated
651+ // as an array of 8, 16, or 32-bit integers. The first element overlaps
652+ // with the capacity, and is never used.
653+ Array8,
654+ Array16,
655+ Array32,
656+ };
657+
658+ IndexStorage () : Value(0 ) {}
659+ IndexStorage (RawType value) : Value(value) {}
660+ IndexStorage (void *ptr, unsigned indexSize, uint8_t capacityLog2) {
661+ assert (capacityLog2 > InlineCapacityLog2);
662+ IndexMode mode;
663+ switch (indexSize) {
664+ case sizeof (uint8_t ):
665+ mode = IndexMode::Array8;
666+ break ;
667+ case sizeof (uint16_t ):
668+ mode = IndexMode::Array16;
669+ break ;
670+ case sizeof (uint32_t ):
671+ mode = IndexMode::Array32;
672+ break ;
673+ default :
674+ swift_unreachable (" unknown index size" );
675+ }
676+ Value = reinterpret_cast <uintptr_t >(ptr) | static_cast <uintptr_t >(mode);
677+ *reinterpret_cast <uint8_t *>(ptr) = capacityLog2;
678+ }
679+
680+ bool valueIsPointer () { return Value & 3 ; }
681+
682+ void *pointer () {
683+ if (valueIsPointer ())
684+ return (void *)(Value & (RawType)~3 );
685+ return nullptr ;
686+ }
687+
688+ IndexMode indexMode () { return IndexMode (Value & 3 ); }
689+
627690 // Index size is variable based on capacity, either 8, 16, or 32 bits.
628691 //
629692 // This is somewhat conservative. We could have, for example, a capacity of
630693 // 512 but a maximum index of only 200, which would still allow for 8-bit
631694 // indices. However, taking advantage of this would require reallocating
632695 // the index storage when the element count crossed a threshold, which is
633696 // more complex, and the advantages are minimal. This keeps it simple.
634- //
635- // The first byte of the storage is the log 2 of the capacity. The remaining
636- // storage is then an array of 8, 16, or 32 bit integers, depending on the
637- // capacity number. This union allows us to access the capacity, and then
638- // access the rest of the storage by taking the address of one of the
639- // IndexZero members and indexing into it (always avoiding index 0).
640- union {
641- uint8_t CapacityLog2;
642- std::atomic<uint8_t > IndexZero8;
643- std::atomic<uint16_t > IndexZero16;
644- std::atomic<uint32_t > IndexZero32;
645- };
646697
647698 // Get the size, in bytes, of the index needed for the given capacity.
648699 static unsigned indexSize (uint8_t capacityLog2) {
@@ -653,46 +704,66 @@ struct ConcurrentReadableHashMap {
653704 return sizeof (uint32_t );
654705 }
655706
656- unsigned indexSize () { return indexSize (CapacityLog2); }
707+ uint8_t getCapacityLog2 () {
708+ if (auto *ptr = pointer ())
709+ return *reinterpret_cast <uint8_t *>(ptr);
710+ return InlineCapacityLog2;
711+ }
657712
658- static IndexStorage * allocate (size_t capacityLog2) {
713+ static IndexStorage allocate (size_t capacityLog2) {
659714 assert (capacityLog2 > 0 );
660715 size_t capacity = 1UL << capacityLog2;
661- auto *ptr = reinterpret_cast <IndexStorage *>(
662- calloc (capacity, indexSize (capacityLog2)) );
716+ unsigned size = indexSize (capacityLog2);
717+ auto *ptr = calloc (capacity, size );
663718 if (!ptr)
664719 swift::crash (" Could not allocate memory." );
665- ptr->CapacityLog2 = capacityLog2;
666- return ptr;
720+ return IndexStorage (ptr, size, capacityLog2);
667721 }
668722
669723 unsigned loadIndexAt (size_t i, std::memory_order order) {
670724 assert (i > 0 && " index zero is off-limits, used to store capacity" );
671-
672- switch (indexSize ()) {
673- case sizeof (uint8_t ):
674- return (&IndexZero8)[i].load (order);
675- case sizeof (uint16_t ):
676- return (&IndexZero16)[i].load (order);
677- case sizeof (uint32_t ):
678- return (&IndexZero32)[i].load (order);
679- default :
680- swift_unreachable (" unknown index size" );
725+ assert (i < (1 << getCapacityLog2 ()) &&
726+ " index is off the end of the indices" );
727+
728+ switch (indexMode ()) {
729+ case IndexMode::Inline:
730+ return (Value >> (i * InlineIndexBits)) & InlineIndexMask;
731+ case IndexMode::Array8:
732+ return ((std::atomic<uint8_t > *)pointer ())[i].load (order);
733+ case IndexMode::Array16:
734+ return ((std::atomic<uint16_t > *)pointer ())[i].load (order);
735+ case IndexMode::Array32:
736+ return ((std::atomic<uint32_t > *)pointer ())[i].load (order);
681737 }
682738 }
683739
684- void storeIndexAt (unsigned value, size_t i, std::memory_order order) {
740+ void storeIndexAt (std::atomic<RawType> *inlineStorage, unsigned value,
741+ size_t i, std::memory_order order) {
685742 assert (i > 0 && " index zero is off-limits, used to store capacity" );
686-
687- switch (indexSize ()) {
688- case sizeof (uint8_t ):
689- return (&IndexZero8)[i].store (value, order);
690- case sizeof (uint16_t ):
691- return (&IndexZero16)[i].store (value, order);
692- case sizeof (uint32_t ):
693- return (&IndexZero32)[i].store (value, order);
694- default :
695- swift_unreachable (" unknown index size" );
743+ assert (i < (1 << getCapacityLog2 ()) &&
744+ " index is off the end of the indices" );
745+
746+ switch (indexMode ()) {
747+ case IndexMode::Inline: {
748+ assert (value == (value & InlineIndexMask) && " value is too big to fit" );
749+ auto shift = i * InlineIndexBits;
750+ assert ((Value & (InlineIndexMask << shift)) == 0 &&
751+ " can't overwrite an existing index" );
752+ assert (Value == inlineStorage->load (std::memory_order_relaxed) &&
753+ " writing with a stale IndexStorage" );
754+ auto newStorage = Value | ((RawType)value << shift);
755+ inlineStorage->store (newStorage, order);
756+ break ;
757+ }
758+ case IndexMode::Array8:
759+ ((std::atomic<uint8_t > *)pointer ())[i].store (value, order);
760+ break ;
761+ case IndexMode::Array16:
762+ ((std::atomic<uint16_t > *)pointer ())[i].store (value, order);
763+ break ;
764+ case IndexMode::Array32:
765+ ((std::atomic<uint32_t > *)pointer ())[i].store (value, order);
766+ break ;
696767 }
697768 }
698769 };
@@ -753,7 +824,11 @@ struct ConcurrentReadableHashMap {
753824 std::atomic<ElementStorage *> Elements{nullptr };
754825
755826 // / The array of indices.
756- std::atomic<IndexStorage *> Indices{nullptr };
827+ // /
828+ // / This has to be stored as a IndexStorage::RawType instead of a IndexStorage
829+ // / because some of our targets don't support interesting structs as atomic
830+ // / types. See also MetadataCache::TrackingInfo which uses the same technique.
831+ std::atomic<typename IndexStorage::RawType> Indices{0 };
757832
758833 // / The writer lock, which must be taken before any mutation of the table.
759834 MutexTy WriterLock;
@@ -798,18 +873,17 @@ struct ConcurrentReadableHashMap {
798873 // / returning the new array with all existing indices copied into it. This
799874 // / operation performs a rehash, so that the indices are in the correct
800875 // / location in the new array.
801- IndexStorage *resize (IndexStorage *indices, uint8_t indicesCapacityLog2,
802- ElemTy *elements) {
803- // Double the size. Start with 16 (fits into 16-byte malloc
804- // bucket), which is 2^4.
805- size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4 ;
876+ IndexStorage resize (IndexStorage indices, uint8_t indicesCapacityLog2,
877+ ElemTy *elements) {
878+ // Double the size.
879+ size_t newCapacityLog2 = indicesCapacityLog2 + 1 ;
806880 size_t newMask = (1UL << newCapacityLog2) - 1 ;
807881
808- IndexStorage * newIndices = IndexStorage::allocate (newCapacityLog2);
882+ IndexStorage newIndices = IndexStorage::allocate (newCapacityLog2);
809883
810884 size_t indicesCount = 1UL << indicesCapacityLog2;
811885 for (size_t i = 1 ; i < indicesCount; i++) {
812- unsigned index = indices-> loadIndexAt (i, std::memory_order_relaxed);
886+ unsigned index = indices. loadIndexAt (i, std::memory_order_relaxed);
813887 if (index == 0 )
814888 continue ;
815889
@@ -819,15 +893,16 @@ struct ConcurrentReadableHashMap {
819893 size_t newI = hash & newMask;
820894 // Index 0 is unusable (occupied by the capacity), so always skip it.
821895 while (newI == 0 ||
822- newIndices-> loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
896+ newIndices. loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
823897 newI = (newI + 1 ) & newMask;
824898 }
825- newIndices-> storeIndexAt (index, newI, std::memory_order_relaxed);
899+ newIndices. storeIndexAt (nullptr , index, newI, std::memory_order_relaxed);
826900 }
827901
828- Indices.store (newIndices, std::memory_order_release);
902+ Indices.store (newIndices. Value , std::memory_order_release);
829903
830- FreeListNode::add (&FreeList, indices);
904+ if (auto *ptr = indices.pointer ())
905+ FreeListNode::add (&FreeList, ptr);
831906
832907 return newIndices;
833908 }
@@ -838,20 +913,18 @@ struct ConcurrentReadableHashMap {
838913 // / of the new element would be stored.
839914 template <class KeyTy >
840915 static std::pair<ElemTy *, unsigned >
841- find (const KeyTy &key, IndexStorage * indices, size_t elementCount,
916+ find (const KeyTy &key, IndexStorage indices, size_t elementCount,
842917 ElemTy *elements) {
843- if (!indices)
844- return {nullptr , 0 };
845918 auto hash = hash_value (key);
846- auto indicesMask = (1UL << indices-> CapacityLog2 ) - 1 ;
919+ auto indicesMask = (1UL << indices. getCapacityLog2 () ) - 1 ;
847920
848921 auto i = hash & indicesMask;
849922 while (true ) {
850923 // Index 0 is used for the mask and is not actually an index.
851924 if (i == 0 )
852925 i++;
853926
854- auto index = indices-> loadIndexAt (i, std::memory_order_acquire);
927+ auto index = indices. loadIndexAt (i, std::memory_order_acquire);
855928 // Element indices are 1-based, 0 means no entry.
856929 if (index == 0 )
857930 return {nullptr , i};
@@ -884,12 +957,12 @@ struct ConcurrentReadableHashMap {
884957 // / Readers take a snapshot of the hash map, then work with the snapshot.
885958 class Snapshot {
886959 ConcurrentReadableHashMap *Map;
887- IndexStorage * Indices;
960+ IndexStorage Indices;
888961 ElemTy *Elements;
889962 size_t ElementCount;
890963
891964 public:
892- Snapshot (ConcurrentReadableHashMap *map, IndexStorage * indices,
965+ Snapshot (ConcurrentReadableHashMap *map, IndexStorage indices,
893966 ElemTy *elements, size_t elementCount)
894967 : Map(map), Indices(indices), Elements(elements),
895968 ElementCount (elementCount) {}
@@ -905,7 +978,7 @@ struct ConcurrentReadableHashMap {
905978 // / Search for an element matching the given key. Returns a pointer to the
906979 // / found element, or nullptr if no matching element exists.
907980 template <class KeyTy > const ElemTy *find (const KeyTy &key) {
908- if (!Indices || !ElementCount || !Elements)
981+ if (!Indices. Value || !ElementCount || !Elements)
909982 return nullptr ;
910983 return ConcurrentReadableHashMap::find (key, Indices, ElementCount,
911984 Elements)
@@ -937,7 +1010,7 @@ struct ConcurrentReadableHashMap {
9371010 // pointer can just mean a concurrent insert that triggered a resize of the
9381011 // elements array. This is harmless aside from a small performance hit, and
9391012 // should not happen often.
940- IndexStorage * indices;
1013+ IndexStorage indices;
9411014 size_t elementCount;
9421015 ElementStorage *elements;
9431016 ElementStorage *elements2;
@@ -972,11 +1045,8 @@ struct ConcurrentReadableHashMap {
9721045 void getOrInsert (KeyTy key, const Call &call) {
9731046 ScopedLockTy guard (WriterLock);
9741047
975- auto *indices = Indices.load (std::memory_order_relaxed);
976- if (!indices)
977- indices = resize (indices, 0 , nullptr );
978-
979- auto indicesCapacityLog2 = indices->CapacityLog2 ;
1048+ auto indices = IndexStorage{Indices.load (std::memory_order_relaxed)};
1049+ auto indicesCapacityLog2 = indices.getCapacityLog2 ();
9801050 auto elementCount = ElementCount.load (std::memory_order_relaxed);
9811051 auto *elements = Elements.load (std::memory_order_relaxed);
9821052 auto *elementsPtr = elements ? elements->data () : nullptr ;
@@ -1012,8 +1082,8 @@ struct ConcurrentReadableHashMap {
10121082 assert (hash_value (key) == hash_value (*element) &&
10131083 " Element must have the same hash code as its key." );
10141084 ElementCount.store (elementCount + 1 , std::memory_order_release);
1015- indices-> storeIndexAt (elementCount + 1 , found.second ,
1016- std::memory_order_release);
1085+ indices. storeIndexAt (&Indices, elementCount + 1 , found.second ,
1086+ std::memory_order_release);
10171087 }
10181088
10191089 deallocateFreeListIfSafe ();
@@ -1024,16 +1094,17 @@ struct ConcurrentReadableHashMap {
10241094 void clear () {
10251095 ScopedLockTy guard (WriterLock);
10261096
1027- auto * indices = Indices.load (std::memory_order_relaxed);
1097+ IndexStorage indices = Indices.load (std::memory_order_relaxed);
10281098 auto *elements = Elements.load (std::memory_order_relaxed);
10291099
10301100 // Order doesn't matter here, snapshots will gracefully handle any field
10311101 // being NULL/0 while the others are not.
1032- Indices.store (nullptr , std::memory_order_relaxed);
1102+ Indices.store (0 , std::memory_order_relaxed);
10331103 ElementCount.store (0 , std::memory_order_relaxed);
10341104 Elements.store (nullptr , std::memory_order_relaxed);
10351105
1036- FreeListNode::add (&FreeList, indices);
1106+ if (auto *ptr = indices.pointer ())
1107+ FreeListNode::add (&FreeList, ptr);
10371108 FreeListNode::add (&FreeList, elements);
10381109
10391110 deallocateFreeListIfSafe ();
0 commit comments