From c082ede5a159a925fcbbec07bd2bb25e05375536 Mon Sep 17 00:00:00 2001 From: Andrew-William-Smith Date: Tue, 24 Sep 2024 00:21:13 +0100 Subject: [PATCH 1/2] Fix unterminated string in BadPattern exception Previously, we would overallocate the string passed back to the OCaml runtime containing the PCRE2 error message; now, we copy the raw error to an intermediate buffer on the stack before allocating an OCaml string of exactly the proper length. This change both fixes the absence of the string terminator and consumes less memory overall. Additionally, I've increased the size of the error message buffer to 256; while 128 is sufficient for the current set of errors supported by PCRE2, the maximum length of any of which is 92 including the NUL terminator, 256 gives us some breathing room should longer messages begin to be used. This length is also a form of unofficial standard for PCRE error message buffer sizes, being used in the Julia runtime and other high-profile open-source applications. Since the OCaml string we allocate will be exactly the length of the actual error message, the impact this additional stack allocation has is negligible. --- src/pcre2_stubs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/pcre2_stubs.c b/src/pcre2_stubs.c index 7cb0e06..47919fe 100644 --- a/src/pcre2_stubs.c +++ b/src/pcre2_stubs.c @@ -248,8 +248,13 @@ static inline void raise_bad_pattern(int code, size_t pos) CAMLparam0(); CAMLlocal1(v_msg); value v_arg; - v_msg = caml_alloc_string(128); - pcre2_get_error_message(code, (PCRE2_UCHAR *)String_val(v_msg), 128); + PCRE2_UCHAR8 err[256]; + + /* We can safely assume that PCRE2's UTF-8-compatible human-intelligible error + messages do not contain NUL. */ + pcre2_get_error_message_8(code, err, sizeof(err) / sizeof(err[0])); + v_msg = caml_copy_string((char *)err); + v_arg = caml_alloc_small(2, 0); Field(v_arg, 0) = v_msg; Field(v_arg, 1) = Val_int(pos); From 6ec979e3a16f78a97688072dc4ecf2aec93fbdab Mon Sep 17 00:00:00 2001 From: Andrew-William-Smith Date: Tue, 24 Sep 2024 00:49:24 +0100 Subject: [PATCH 2/2] Add test for string termination fix --- test/pcre2_tests.ml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/pcre2_tests.ml b/test/pcre2_tests.ml index 65ac7c5..9634aa5 100644 --- a/test/pcre2_tests.ml +++ b/test/pcre2_tests.ml @@ -9,12 +9,28 @@ let simple_test ctxt = NoGroup; Group (2, "u"); Text "ef"] (full_split ~pat:"(x)|(u)" "abxcduef") +let marshalled_string_termination ctxt = + try + (* At the time of writing, the longest error message that can be returned by + PCRE2 is "\g is not followed by a braced, angle-bracketed, or quoted + name/number or by a plain number". *) + ignore @@ regexp "\\gg"; + assert_failure "Invalid pattern must fail to compile." + with Error (BadPattern (msg, offset)) -> + let is_non_printing c = + let codepoint = Char.code c in + codepoint < (Char.code ' ') || codepoint > (Char.code '~') + in + assert_equal offset 2; + assert_bool "PCRE2 string contains non-printing character." + (not @@ String.exists is_non_printing msg) + let suite = "Test pcre" >::: [ - "simple_test" >:: simple_test + "simple_test" >:: simple_test; + "marshalled_string_termination" >:: marshalled_string_termination; ] let _ = if not !Sys.interactive then run_test_tt_main suite else () -