|
17 | 17 | namespace LIBC_NAMESPACE_DECL { |
18 | 18 | namespace internal { |
19 | 19 |
|
20 | | -using Compare = int(const void *, const void *); |
21 | | -using CompareWithState = int(const void *, const void *, void *); |
22 | | - |
23 | | -enum class CompType { COMPARE, COMPARE_WITH_STATE }; |
24 | | - |
25 | | -struct Comparator { |
26 | | - union { |
27 | | - Compare *comp_func; |
28 | | - CompareWithState *comp_func_r; |
29 | | - }; |
30 | | - const CompType comp_type; |
31 | | - |
32 | | - void *arg; |
33 | | - |
34 | | - Comparator(Compare *func) |
35 | | - : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {} |
36 | | - |
37 | | - Comparator(CompareWithState *func, void *arg_val) |
38 | | - : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE), |
39 | | - arg(arg_val) {} |
40 | | - |
41 | | -#if defined(__clang__) |
42 | | - // Recent upstream changes to -fsanitize=function find more instances of |
43 | | - // function type mismatches. One case is with the comparator passed to this |
44 | | - // class. Libraries will tend to pass comparators that take pointers to |
45 | | - // varying types while this comparator expects to accept const void pointers. |
46 | | - // Ideally those tools would pass a function that strictly accepts const |
47 | | - // void*s to avoid UB, or would use qsort_r to pass their own comparator. |
48 | | - [[clang::no_sanitize("function")]] |
49 | | -#endif |
50 | | - int comp_vals(const void *a, const void *b) const { |
51 | | - if (comp_type == CompType::COMPARE) { |
52 | | - return comp_func(a, b); |
53 | | - } else { |
54 | | - return comp_func_r(a, b, arg); |
| 20 | +class ArrayGenericSize { |
| 21 | + cpp::byte *array_base; |
| 22 | + size_t array_len; |
| 23 | + size_t elem_size; |
| 24 | + |
| 25 | + LIBC_INLINE cpp::byte *get_internal(size_t i) const { |
| 26 | + return array_base + (i * elem_size); |
| 27 | + } |
| 28 | + |
| 29 | +public: |
| 30 | + LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e) |
| 31 | + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s), |
| 32 | + elem_size(e) {} |
| 33 | + |
| 34 | + static constexpr bool has_fixed_size() { return false; } |
| 35 | + |
| 36 | + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } |
| 37 | + |
| 38 | + LIBC_INLINE void swap(size_t i, size_t j) const { |
| 39 | + // It's possible to use 8 byte blocks with `uint64_t`, but that |
| 40 | + // generates more machine code as the remainder loop gets |
| 41 | + // unrolled, plus 4 byte operations are more likely to be |
| 42 | + // efficient on a wider variety of hardware. On x86 LLVM tends |
| 43 | + // to unroll the block loop again into 2 16 byte swaps per |
| 44 | + // iteration which is another reason that 4 byte blocks yields |
| 45 | + // good performance even for big types. |
| 46 | + using block_t = uint32_t; |
| 47 | + constexpr size_t BLOCK_SIZE = sizeof(block_t); |
| 48 | + |
| 49 | + alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE]; |
| 50 | + |
| 51 | + cpp::byte *elem_i = get_internal(i); |
| 52 | + cpp::byte *elem_j = get_internal(j); |
| 53 | + |
| 54 | + const size_t elem_size_rem = elem_size % BLOCK_SIZE; |
| 55 | + const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); |
| 56 | + |
| 57 | + while (elem_i != elem_i_block_end) { |
| 58 | + __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); |
| 59 | + __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); |
| 60 | + __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); |
| 61 | + |
| 62 | + elem_i += BLOCK_SIZE; |
| 63 | + elem_j += BLOCK_SIZE; |
| 64 | + } |
| 65 | + |
| 66 | + for (size_t n = 0; n < elem_size_rem; ++n) { |
| 67 | + cpp::byte tmp = elem_i[n]; |
| 68 | + elem_i[n] = elem_j[n]; |
| 69 | + elem_j[n] = tmp; |
55 | 70 | } |
56 | 71 | } |
| 72 | + |
| 73 | + LIBC_INLINE size_t len() const { return array_len; } |
| 74 | + |
| 75 | + // Make an Array starting at index |i| and length |s|. |
| 76 | + LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const { |
| 77 | + return ArrayGenericSize(get_internal(i), s, elem_size); |
| 78 | + } |
| 79 | + |
| 80 | + // Reset this Array to point at a different interval of the same |
| 81 | + // items starting at index |i|. |
| 82 | + LIBC_INLINE void reset_bounds(size_t i, size_t s) { |
| 83 | + array_base = get_internal(i); |
| 84 | + array_len = s; |
| 85 | + } |
57 | 86 | }; |
58 | 87 |
|
59 | | -class Array { |
60 | | - uint8_t *array; |
61 | | - size_t array_size; |
62 | | - size_t elem_size; |
63 | | - Comparator compare; |
| 88 | +// Having a specialized Array type for sorting that knows at |
| 89 | +// compile-time what the size of the element is, allows for much more |
| 90 | +// efficient swapping and for cheaper offset calculations. |
| 91 | +template <size_t ELEM_SIZE> class ArrayFixedSize { |
| 92 | + cpp::byte *array_base; |
| 93 | + size_t array_len; |
64 | 94 |
|
65 | | -public: |
66 | | - Array(uint8_t *a, size_t s, size_t e, Comparator c) |
67 | | - : array(a), array_size(s), elem_size(e), compare(c) {} |
68 | | - |
69 | | - uint8_t *get(size_t i) const { return array + i * elem_size; } |
70 | | - |
71 | | - void swap(size_t i, size_t j) const { |
72 | | - uint8_t *elem_i = get(i); |
73 | | - uint8_t *elem_j = get(j); |
74 | | - for (size_t b = 0; b < elem_size; ++b) { |
75 | | - uint8_t temp = elem_i[b]; |
76 | | - elem_i[b] = elem_j[b]; |
77 | | - elem_j[b] = temp; |
78 | | - } |
| 95 | + LIBC_INLINE cpp::byte *get_internal(size_t i) const { |
| 96 | + return array_base + (i * ELEM_SIZE); |
79 | 97 | } |
80 | 98 |
|
81 | | - int elem_compare(size_t i, const uint8_t *other) const { |
82 | | - // An element must compare equal to itself so we don't need to consult the |
83 | | - // user provided comparator. |
84 | | - if (get(i) == other) |
85 | | - return 0; |
86 | | - return compare.comp_vals(get(i), other); |
| 99 | +public: |
| 100 | + LIBC_INLINE ArrayFixedSize(void *a, size_t s) |
| 101 | + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {} |
| 102 | + |
| 103 | + // Beware this function is used a heuristic for cheap to swap types, so |
| 104 | + // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad |
| 105 | + // idea perf wise. |
| 106 | + static constexpr bool has_fixed_size() { return true; } |
| 107 | + |
| 108 | + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } |
| 109 | + |
| 110 | + LIBC_INLINE void swap(size_t i, size_t j) const { |
| 111 | + alignas(32) cpp::byte tmp[ELEM_SIZE]; |
| 112 | + |
| 113 | + cpp::byte *elem_i = get_internal(i); |
| 114 | + cpp::byte *elem_j = get_internal(j); |
| 115 | + |
| 116 | + __builtin_memcpy(tmp, elem_i, ELEM_SIZE); |
| 117 | + __builtin_memmove(elem_i, elem_j, ELEM_SIZE); |
| 118 | + __builtin_memcpy(elem_j, tmp, ELEM_SIZE); |
87 | 119 | } |
88 | 120 |
|
89 | | - size_t size() const { return array_size; } |
| 121 | + LIBC_INLINE size_t len() const { return array_len; } |
90 | 122 |
|
91 | | - // Make an Array starting at index |i| and size |s|. |
92 | | - LIBC_INLINE Array make_array(size_t i, size_t s) const { |
93 | | - return Array(get(i), s, elem_size, compare); |
| 123 | + // Make an Array starting at index |i| and length |s|. |
| 124 | + LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const { |
| 125 | + return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s); |
94 | 126 | } |
95 | 127 |
|
96 | | - // Reset this Array to point at a different interval of the same items. |
97 | | - LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) { |
98 | | - array = a; |
99 | | - array_size = s; |
| 128 | + // Reset this Array to point at a different interval of the same |
| 129 | + // items starting at index |i|. |
| 130 | + LIBC_INLINE void reset_bounds(size_t i, size_t s) { |
| 131 | + array_base = get_internal(i); |
| 132 | + array_len = s; |
100 | 133 | } |
101 | 134 | }; |
102 | 135 |
|
103 | | -using SortingRoutine = void(const Array &); |
104 | | - |
105 | 136 | } // namespace internal |
106 | 137 | } // namespace LIBC_NAMESPACE_DECL |
107 | 138 |
|
|
0 commit comments