15
15
void * memchr (const void * src , int c , size_t n )
16
16
{
17
17
#if defined(__wasm_simd128__ ) && defined(__wasilibc_simd_string )
18
+ // Skip Clang 19 and Clang 20 which have a bug (llvm/llvm-project#146574)
19
+ // which results in an ICE when inline assembly is used with a vector result.
20
+ #if __clang_major__ != 19 && __clang_major__ != 20
18
21
// When n is zero, a function that locates a character finds no occurrence.
19
22
// Otherwise, decrement n to ensure sub_overflow overflows
20
23
// when n would go equal-to-or-below zero.
21
24
if (!n -- ) {
22
25
return NULL ;
23
26
}
24
27
25
- // memchr must behave as if it reads characters sequentially
26
- // and stops as soon as a match is found.
27
- // Aligning ensures loads beyond the first match are safe.
28
- // Casting through uintptr_t makes this implementation-defined,
29
- // rather than undefined behavior.
28
+ // Note that reading before/after the allocation of a pointer is UB in
29
+ // C, so inline assembly is used to generate the exact machine
30
+ // instruction we want with opaque semantics to the compiler to avoid
31
+ // the UB.
30
32
uintptr_t align = (uintptr_t )src % sizeof (v128_t );
31
- const v128_t * v = (v128_t * )(( uintptr_t )src - align ) ;
32
- const v128_t vc = wasm_i8x16_splat (c );
33
+ uintptr_t addr = (uintptr_t )src - align ;
34
+ v128_t vc = wasm_i8x16_splat (c );
33
35
34
36
for (;;) {
35
- const v128_t cmp = wasm_i8x16_eq (* v , vc );
37
+ v128_t v ;
38
+ __asm__ (
39
+ "local.get %1\n"
40
+ "v128.load 0\n"
41
+ "local.set %0\n"
42
+ : "=r" (v )
43
+ : "r" (addr )
44
+ : "memory" );
45
+ v128_t cmp = wasm_i8x16_eq (v , vc );
36
46
// Bitmask is slow on AArch64, any_true is much faster.
37
47
if (wasm_v128_any_true (cmp )) {
38
48
// Clear the bits corresponding to align (little-endian)
@@ -48,16 +58,18 @@ void *memchr(const void *src, int c, size_t n)
48
58
// That's a match, unless it is beyond the end of the object.
49
59
// Recall that we decremented n, so less-than-or-equal-to is correct.
50
60
size_t ctz = __builtin_ctz (mask );
51
- return ctz - align <= n ? (char * )v + ctz : NULL ;
61
+ return ctz - align <= n ? (char * )src + (addr + ctz - (uintptr_t )src )
62
+ : NULL ;
52
63
}
53
64
}
54
65
// Decrement n; if it overflows we're done.
55
66
if (__builtin_sub_overflow (n , sizeof (v128_t ) - align , & n )) {
56
67
return NULL ;
57
68
}
58
69
align = 0 ;
59
- v ++ ;
70
+ addr += sizeof ( v128_t ) ;
60
71
}
72
+ #endif
61
73
#endif
62
74
63
75
const unsigned char * s = src ;
0 commit comments