-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Enable ASAN checks for wcscat/wcsncat on Windows, and wcscpy/wcsncpy on all platforms #90909
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…wcsncpy on all platforms.
@llvm/pr-subscribers-compiler-rt-sanitizer Author: None (branh) ChangesUse of wchar versions of string functions is common on Windows. We (Microsoft) are working to improve ASAN instrumentation of these functions, to bring wchar_t* support up to parity with the char* functions in string.h. We're starting by enabling the wcscat/wcsncat functions on Windows which have already been enabled on POSIX systems, and adding interceptors for wcscpy and wcsncpy. Full diff: https://github.com/llvm/llvm-project/pull/90909.diff 8 Files Affected:
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index 6d1360e104975f..bb2b100e089acd 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -65,6 +65,15 @@ static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
return internal_strnlen(s, maxlen);
}
+static inline uptr MaybeRealWcsnlen(const wchar_t *s, uptr maxlen) {
+# if SANITIZER_INTERCEPT_STRNLEN
+ if (REAL(wcsnlen)) {
+ return REAL(wcsnlen)(s, maxlen);
+ }
+# endif
+ return internal_wcsnlen(s, maxlen);
+}
+
void SetThreadName(const char *name) {
AsanThread *t = GetCurrentThread();
if (t)
@@ -570,6 +579,21 @@ INTERCEPTOR(char *, strcpy, char *to, const char *from) {
return REAL(strcpy)(to, from);
}
+INTERCEPTOR(wchar_t *, wcscpy, wchar_t *to, const wchar_t *from) {
+ void *ctx;
+ ASAN_INTERCEPTOR_ENTER(ctx, wcscpy);
+ if (!TryAsanInitFromRtl())
+ return REAL(wcscpy)(to, from);
+
+ if (flags()->replace_str) {
+ uptr from_size = (internal_wcslen(from) + 1) * sizeof(wchar_t);
+ CHECK_RANGES_OVERLAP("wcscpy", to, from_size, from, from_size);
+ ASAN_READ_RANGE(ctx, from, from_size);
+ ASAN_WRITE_RANGE(ctx, to, from_size);
+ }
+ return REAL(wcscpy)(to, from);
+}
+
// Windows doesn't always define the strdup identifier,
// and when it does it's a macro defined to either _strdup
// or _strdup_dbg, _strdup_dbg ends up calling _strdup, so
@@ -630,6 +654,20 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
return REAL(strncpy)(to, from, size);
}
+INTERCEPTOR(wchar_t *, wcsncpy, wchar_t *to, const wchar_t *from, uptr size) {
+ void *ctx;
+ ASAN_INTERCEPTOR_ENTER(ctx, strncpy);
+ AsanInitFromRtl();
+ if (flags()->replace_str) {
+ uptr from_size =
+ Min(size, MaybeRealWcsnlen(from, size) + 1) * sizeof(wchar_t);
+ CHECK_RANGES_OVERLAP("wcsncpy", to, from_size, from, from_size);
+ ASAN_READ_RANGE(ctx, from, from_size);
+ ASAN_WRITE_RANGE(ctx, to, size);
+ }
+ return REAL(wcsncpy)(to, from, size);
+}
+
template <typename Fn>
static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
char **endptr, int base)
@@ -781,6 +819,11 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(strncat);
ASAN_INTERCEPT_FUNC(strncpy);
ASAN_INTERCEPT_FUNC(strdup);
+
+ // Intercept wcs* functions.
+ ASAN_INTERCEPT_FUNC(wcscpy);
+ ASAN_INTERCEPT_FUNC(wcsncpy);
+
# if ASAN_INTERCEPT___STRDUP
ASAN_INTERCEPT_FUNC(__strdup);
#endif
diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h
index 826b45f5ada8c0..f38fe100a611a1 100644
--- a/compiler-rt/lib/asan/asan_interceptors.h
+++ b/compiler-rt/lib/asan/asan_interceptors.h
@@ -129,6 +129,7 @@ DECLARE_REAL(char*, strchr, const char *str, int c)
DECLARE_REAL(SIZE_T, strlen, const char *s)
DECLARE_REAL(char*, strncpy, char *to, const char *from, uptr size)
DECLARE_REAL(uptr, strnlen, const char *s, uptr maxlen)
+DECLARE_REAL(uptr, wcsnlen, const wchar_t *s, uptr maxlen)
DECLARE_REAL(char*, strstr, const char *s1, const char *s2)
# if !SANITIZER_APPLE
diff --git a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp
index 35871a942a7a12..f1fb4b07400d52 100644
--- a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp
+++ b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp
@@ -93,6 +93,10 @@ INTERCEPT_LIBRARY_FUNCTION(strstr);
INTERCEPT_LIBRARY_FUNCTION(strtok);
INTERCEPT_LIBRARY_FUNCTION(strtol);
INTERCEPT_LIBRARY_FUNCTION(strtoll);
+INTERCEPT_LIBRARY_FUNCTION(wcscat);
+INTERCEPT_LIBRARY_FUNCTION(wcscpy);
+INTERCEPT_LIBRARY_FUNCTION(wcsncat);
+INTERCEPT_LIBRARY_FUNCTION(wcsncpy);
INTERCEPT_LIBRARY_FUNCTION(wcslen);
INTERCEPT_LIBRARY_FUNCTION(wcsnlen);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index de55c736d0e144..417355f5f16ea7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -498,7 +498,7 @@
#define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD)
#define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
#define SANITIZER_INTERCEPT_WCSLEN 1
-#define SANITIZER_INTERCEPT_WCSCAT SI_POSIX
+#define SANITIZER_INTERCEPT_WCSCAT 1
#define SANITIZER_INTERCEPT_WCSDUP SI_POSIX
#define SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION (!SI_WINDOWS && SI_NOT_FUCHSIA)
#define SANITIZER_INTERCEPT_BSD_SIGNAL SI_ANDROID
diff --git a/compiler-rt/test/asan/TestCases/wcscat.cpp b/compiler-rt/test/asan/TestCases/wcscat.cpp
new file mode 100644
index 00000000000000..c870e4c1e95b46
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcscat.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ wchar_t *start = L"X means ";
+ wchar_t *append = L"dog";
+ wchar_t goodDst[12];
+ wcscpy(goodDst, start);
+ wcscat(goodDst, append);
+
+ wchar_t badDst[9];
+ wcscpy(badDst, start);
+ printf("Good so far.\n");
+ // CHECK: Good so far.
+ wcscat(badDst, append); // Boom!
+ // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+ // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcscat {{.*}}\sanitizer_common_interceptors.inc:{{[0-9]+}}
+ // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcscat.cpp:22
+ // CHECK: This frame has 2 object(s):
+ // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+ // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+ // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+ // CHECK: Shadow bytes around the buggy address:
+ // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+ // CHECK-NEXT: Addressable: 00
+ // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+ // CHECK-NEXT: Heap left redzone: fa
+ // CHECK-NEXT: Freed heap region: fd
+ // CHECK-NEXT: Stack left redzone: f1
+ // CHECK-NEXT: Stack mid redzone: f2
+ // CHECK-NEXT: Stack right redzone: f3
+ // CHECK-NEXT: Stack after return: f5
+ // CHECK-NEXT: Stack use after scope: f8
+ // CHECK-NEXT: Global redzone: f9
+ // CHECK-NEXT: Global init order: f6
+ // CHECK-NEXT: Poisoned by user: f7
+ // CHECK-NEXT: Container overflow: fc
+ // CHECK-NEXT: Array cookie: ac
+ // CHECK-NEXT: Intra object redzone: bb
+ // CHECK-NEXT: ASan internal: fe
+ // CHECK-NEXT: Left alloca redzone: ca
+ // CHECK-NEXT: Right alloca redzone: cb
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcscpy.cpp b/compiler-rt/test/asan/TestCases/wcscpy.cpp
new file mode 100644
index 00000000000000..9ae4d8b9418b93
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcscpy.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ wchar_t *src = L"X means dog";
+ wchar_t goodDst[12];
+ wcscpy(goodDst, src);
+
+ wchar_t badDst[7];
+ printf("Good so far.\n");
+ // CHECK: Good so far.
+
+ wcscpy(badDst, src); // Boom!
+ // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+ // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcscpy {{.*}}\asan_interceptors.cpp:{{[0-9]+}}
+ // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcscpy.cpp:20
+ // CHECK: This frame has 2 object(s):
+ // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+ // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+ // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+ // CHECK: Shadow bytes around the buggy address:
+ // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+ // CHECK-NEXT: Addressable: 00
+ // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+ // CHECK-NEXT: Heap left redzone: fa
+ // CHECK-NEXT: Freed heap region: fd
+ // CHECK-NEXT: Stack left redzone: f1
+ // CHECK-NEXT: Stack mid redzone: f2
+ // CHECK-NEXT: Stack right redzone: f3
+ // CHECK-NEXT: Stack after return: f5
+ // CHECK-NEXT: Stack use after scope: f8
+ // CHECK-NEXT: Global redzone: f9
+ // CHECK-NEXT: Global init order: f6
+ // CHECK-NEXT: Poisoned by user: f7
+ // CHECK-NEXT: Container overflow: fc
+ // CHECK-NEXT: Array cookie: ac
+ // CHECK-NEXT: Intra object redzone: bb
+ // CHECK-NEXT: ASan internal: fe
+ // CHECK-NEXT: Left alloca redzone: ca
+ // CHECK-NEXT: Right alloca redzone: cb
+ printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcsncat.cpp b/compiler-rt/test/asan/TestCases/wcsncat.cpp
new file mode 100644
index 00000000000000..81c6872b7ec489
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcsncat.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ wchar_t *start = L"X means ";
+ wchar_t *append = L"dog";
+ wchar_t goodDst[15];
+ wcscpy(goodDst, start);
+ wcsncat(goodDst, append, 5);
+
+ wchar_t badDst[11];
+ wcscpy(badDst, start);
+ wcsncat(badDst, append, 1);
+ printf("Good so far.\n");
+ // CHECK: Good so far.
+ wcsncat(badDst, append, 3); // Boom!
+ // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+ // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcsncat {{.*}}\sanitizer_common_interceptors.inc:{{[0-9]+}}
+ // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcsncat.cpp:23
+ // CHECK: This frame has 2 object(s):
+ // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+ // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+ // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+ // CHECK: Shadow bytes around the buggy address:
+ // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+ // CHECK-NEXT: Addressable: 00
+ // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+ // CHECK-NEXT: Heap left redzone: fa
+ // CHECK-NEXT: Freed heap region: fd
+ // CHECK-NEXT: Stack left redzone: f1
+ // CHECK-NEXT: Stack mid redzone: f2
+ // CHECK-NEXT: Stack right redzone: f3
+ // CHECK-NEXT: Stack after return: f5
+ // CHECK-NEXT: Stack use after scope: f8
+ // CHECK-NEXT: Global redzone: f9
+ // CHECK-NEXT: Global init order: f6
+ // CHECK-NEXT: Poisoned by user: f7
+ // CHECK-NEXT: Container overflow: fc
+ // CHECK-NEXT: Array cookie: ac
+ // CHECK-NEXT: Intra object redzone: bb
+ // CHECK-NEXT: ASan internal: fe
+ // CHECK-NEXT: Left alloca redzone: ca
+ // CHECK-NEXT: Right alloca redzone: cb
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcsncpy.cpp b/compiler-rt/test/asan/TestCases/wcsncpy.cpp
new file mode 100644
index 00000000000000..0d0c9a142e418d
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcsncpy.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ wchar_t *src = L"X means dog";
+ wchar_t goodDst[12];
+ wcsncpy(goodDst, src, 12);
+
+ wchar_t badDst[7];
+ wcsncpy(badDst, src, 7); // This should still work.
+ printf("Good so far.\n");
+ // CHECK: Good so far.
+
+ wcsncpy(badDst, src, 15); // Boom!
+ // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+ // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcsncpy {{.*}}\asan_interceptors.cpp:{{[0-9]+}}
+ // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcsncpy.cpp:21
+ // CHECK: This frame has 2 object(s):
+ // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+ // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+ // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+ // CHECK: Shadow bytes around the buggy address:
+ // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+ // CHECK-NEXT: Addressable: 00
+ // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+ // CHECK-NEXT: Heap left redzone: fa
+ // CHECK-NEXT: Freed heap region: fd
+ // CHECK-NEXT: Stack left redzone: f1
+ // CHECK-NEXT: Stack mid redzone: f2
+ // CHECK-NEXT: Stack right redzone: f3
+ // CHECK-NEXT: Stack after return: f5
+ // CHECK-NEXT: Stack use after scope: f8
+ // CHECK-NEXT: Global redzone: f9
+ // CHECK-NEXT: Global init order: f6
+ // CHECK-NEXT: Poisoned by user: f7
+ // CHECK-NEXT: Container overflow: fc
+ // CHECK-NEXT: Array cookie: ac
+ // CHECK-NEXT: Intra object redzone: bb
+ // CHECK-NEXT: ASan internal: fe
+ // CHECK-NEXT: Left alloca redzone: ca
+ // CHECK-NEXT: Right alloca redzone: cb
+ printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
|
@@ -0,0 +1,51 @@ | |||
// RUN: %clang_cl_asan -Od -Zi %s -Fe%t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the reason the linux tests are failing is clang_asan
vs clang_cl_asan
? not sure though
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, we shouldn't have any %clang_cl_asan in the top level testscases directory
Switch this to %clangxx_asan, that should work fine everywhere (but on mingw it runs clang++
)
I think this should be %clangxx_asan -O0 %s -o %t
, but you should probably copy one the other examples of a %clangxx_asan
line in the TestCases folder.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that when we run our tests with msvc we substitute out common GNU-style command line options for the closest MSVC style ones, which is why it's OK to put stuff like -o %t
here.
the %
isn't really a magic character, it's just convention that substitutions target strings that begin with %.
} | ||
|
||
static inline uptr MaybeRealWcsnlen(const wchar_t *s, uptr maxlen) { | ||
# if SANITIZER_INTERCEPT_STRNLEN |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this supposed to be SANITIZER_INTERCEPT_WCSLEN
? Or should there be a wide string equivalent?
@@ -0,0 +1,51 @@ | |||
// RUN: %clang_cl_asan -Od -Zi %s -Fe%t | |||
// RUN: not %run %t 2>&1 | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These can be on the same line, that's the convention
@@ -0,0 +1,51 @@ | |||
// RUN: %clang_cl_asan -Od -Zi %s -Fe%t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This appears to be basically the same as the below // RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
, I don't think we need it (debug information is the default)
@@ -0,0 +1,52 @@ | |||
// RUN: %clang_cl_asan -Od -Zi %s -Fe%t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto duplicate
@@ -0,0 +1,50 @@ | |||
// RUN: %clang_cl_asan -Od -Zi %s -Fe%t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto duplicate
|
||
static inline uptr MaybeRealWcsnlen(const wchar_t *s, uptr maxlen) { | ||
# if SANITIZER_INTERCEPT_STRNLEN | ||
if (REAL(wcsnlen)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this code does not {}
in cases like this
Hi all, We have worked on AddressSanitizer in the context of a research paper. During that work, we observed that ASan may miss some Juliet Test Suite cases involving wide-character functions such as wcscpy(). In fact, we previously implemented a prototype fix for this issue on LLVM 4, and we would like to help continue this effort upstream. This PR (#90909) looks very relevant and already contains useful work. Since it has been inactive for some time, would it be preferable for us to:
For context, some of our findings are discussed in our recent paper: We’d be happy to continue and address the test issues/review comments that were mentioned. Thanks! |
Nice work! Congrats on your publication. For future reference, you might be interested in comparing your work to "Light-weight Bounds Checking" (CGO 2012; https://www.seclab.cs.sunysb.edu/seclab/pubs/lbc.pdf), which pioneered the general two-stage "check if the value is possibly a red zone value, if so do a slow-path check" approach. |
Thanks a lot for the kind words and for the pointer to the Light-weight Bounds Checking paper, which is definitely an interesting and highly related paper. We’re happy to move this PR work forward, i.e., support wide-character functions such as |
…t/wcsncat on Windows Summary: - Add ASan interceptors for wcscpy/wcsncpy and register them. - Enable wcscat/wcsncat on Windows via platform interceptor macro. - Add MaybeRealWcsnlen guarded by SANITIZER_INTERCEPT_WCSLEN. - Update Windows static thunk to forward wchar functions. - Add tests for wcscpy/wcsncpy/wcscat/wcsncat with standard RUN lines. Prior work and attribution: - Based on and extends PR llvm#90909 (author: branh, Microsoft). Thanks for the prior work and review feedback. llvm#90909 Testing: - AArch64 Linux: 4 new tests pass; full check-asan passed (0 failures) after enabling profile runtime. Context: - Related research background: Tech-ASan (Two-stage check for AddressSanitizer): https://arxiv.org/abs/2506.05022
- Implement wchar interceptors; register Windows thunk. - wcsncpy: compute write size in bytes (size * sizeof(wchar_t)) to avoid missed overflows when sizeof(wchar_t) != 1. - Harden tests (fflush, resilient FileCheck). Follow-up to PR llvm#90909: builds on that work and addresses review feedback. Refs: llvm#90909
…ows (#160493) Summary - Add ASan interceptors for wcscpy/wcsncpy on all platforms. - Enable wcscat/wcsncat on Windows (already enabled on POSIX via sanitizer_common). Motivation - Use of wchar string APIs is common on Windows; improve parity with char* string checks. Changes - Implement wcscpy/wcsncpy in asan_interceptors.cpp; check overlap and mark read/write ranges in bytes. - wcsncpy: compute write size in bytes (size * sizeof(wchar_t)) to avoid missed overflows when sizeof(wchar_t) != 1. - Use MaybeRealWcsnlen when available to bound reads. - Register Windows static thunk for wcscpy/wcsncpy/wcscat/wcsncat; rely on sanitizer_common interceptors for wcscat/wcsncat. - Tests: add wcscpy/wcsncpy/wcscat/wcsncat; flush stdout before crash; use resilient FileCheck patterns (reuse [[ADDR]], wildcard for function suffixes and paths, flexible line numbers). Testing - AArch64 Linux: new tests pass with check-asan locally. Follow-up to and based on prior work in PR #90909 (author: branh, Microsoft); builds on that work and addresses review feedback. Thanks! --------- Signed-off-by: Yixuan Cao <[email protected]>
…ows (llvm#160493) Summary - Add ASan interceptors for wcscpy/wcsncpy on all platforms. - Enable wcscat/wcsncat on Windows (already enabled on POSIX via sanitizer_common). Motivation - Use of wchar string APIs is common on Windows; improve parity with char* string checks. Changes - Implement wcscpy/wcsncpy in asan_interceptors.cpp; check overlap and mark read/write ranges in bytes. - wcsncpy: compute write size in bytes (size * sizeof(wchar_t)) to avoid missed overflows when sizeof(wchar_t) != 1. - Use MaybeRealWcsnlen when available to bound reads. - Register Windows static thunk for wcscpy/wcsncpy/wcscat/wcsncat; rely on sanitizer_common interceptors for wcscat/wcsncat. - Tests: add wcscpy/wcsncpy/wcscat/wcsncat; flush stdout before crash; use resilient FileCheck patterns (reuse [[ADDR]], wildcard for function suffixes and paths, flexible line numbers). Testing - AArch64 Linux: new tests pass with check-asan locally. Follow-up to and based on prior work in PR llvm#90909 (author: branh, Microsoft); builds on that work and addresses review feedback. Thanks! --------- Signed-off-by: Yixuan Cao <[email protected]>
Use of wchar versions of string functions is common on Windows.
We (Microsoft) are working to improve ASAN instrumentation of these functions, to bring wchar_t* support up to parity with the char* functions in string.h.
We're starting by enabling the wcscat/wcsncat functions on Windows which have already been enabled on POSIX systems, and adding interceptors for wcscpy and wcsncpy.