diff --git a/src/pcre2_stubs.c b/src/pcre2_stubs.c index 7cb0e06..47919fe 100644 --- a/src/pcre2_stubs.c +++ b/src/pcre2_stubs.c @@ -248,8 +248,13 @@ static inline void raise_bad_pattern(int code, size_t pos) CAMLparam0(); CAMLlocal1(v_msg); value v_arg; - v_msg = caml_alloc_string(128); - pcre2_get_error_message(code, (PCRE2_UCHAR *)String_val(v_msg), 128); + PCRE2_UCHAR8 err[256]; + + /* We can safely assume that PCRE2's UTF-8-compatible human-intelligible error + messages do not contain NUL. */ + pcre2_get_error_message_8(code, err, sizeof(err) / sizeof(err[0])); + v_msg = caml_copy_string((char *)err); + v_arg = caml_alloc_small(2, 0); Field(v_arg, 0) = v_msg; Field(v_arg, 1) = Val_int(pos); diff --git a/test/pcre2_tests.ml b/test/pcre2_tests.ml index 65ac7c5..9634aa5 100644 --- a/test/pcre2_tests.ml +++ b/test/pcre2_tests.ml @@ -9,12 +9,28 @@ let simple_test ctxt = NoGroup; Group (2, "u"); Text "ef"] (full_split ~pat:"(x)|(u)" "abxcduef") +let marshalled_string_termination ctxt = + try + (* At the time of writing, the longest error message that can be returned by + PCRE2 is "\g is not followed by a braced, angle-bracketed, or quoted + name/number or by a plain number". *) + ignore @@ regexp "\\gg"; + assert_failure "Invalid pattern must fail to compile." + with Error (BadPattern (msg, offset)) -> + let is_non_printing c = + let codepoint = Char.code c in + codepoint < (Char.code ' ') || codepoint > (Char.code '~') + in + assert_equal offset 2; + assert_bool "PCRE2 string contains non-printing character." + (not @@ String.exists is_non_printing msg) + let suite = "Test pcre" >::: [ - "simple_test" >:: simple_test + "simple_test" >:: simple_test; + "marshalled_string_termination" >:: marshalled_string_termination; ] let _ = if not !Sys.interactive then run_test_tt_main suite else () -