Skip to content

Commit d8eb428

Browse files
committed
Fix code generation for emojis in polyvars and labels
1 parent 553bd9c commit d8eb428

File tree

7 files changed

+105
-23
lines changed

7 files changed

+105
-23
lines changed

compiler/core/js_dump_string.ml

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424

2525
module P = Ext_pp
2626

27+
open Ext_utf8
28+
2729
(** Avoid to allocate single char string too many times*)
2830
let array_str1 = Array.init 256 (fun i -> String.make 1 (Char.chr i))
2931

@@ -56,40 +58,86 @@ let ( +> ) = Ext_buffer.add_string
5658
let escape_to_buffer f (* ?(utf=false)*) s =
5759
let pp_raw_string f (* ?(utf=false)*) s =
5860
let l = String.length s in
59-
for i = 0 to l - 1 do
60-
let c = String.unsafe_get s i in
61+
let i = ref 0 in
62+
while !i < l do
63+
let c = String.unsafe_get s !i in
6164
match c with
62-
| '\b' -> f +> "\\b"
63-
| '\012' -> f +> "\\f"
64-
| '\n' -> f +> "\\n"
65-
| '\r' -> f +> "\\r"
66-
| '\t' -> f +> "\\t"
65+
| '\b' ->
66+
f +> "\\b";
67+
incr i
68+
| '\012' ->
69+
f +> "\\f";
70+
incr i
71+
| '\n' ->
72+
f +> "\\n";
73+
incr i
74+
| '\r' ->
75+
f +> "\\r";
76+
incr i
77+
| '\t' ->
78+
f +> "\\t";
79+
incr i
6780
(* This escape sequence is not supported by IE < 9
6881
| '\011' -> "\\v"
69-
IE < 9 treats '\v' as 'v' instead of a vertical tab ('\x0B').
70-
If cross-browser compatibility is a concern, use \x0B instead of \v.
82+
IE < 9 treats '\v' as 'v' instead of a vertical tab ('\x0B').
83+
If cross-browser compatibility is a concern, use \x0B instead of \v.
7184
72-
Another thing to note is that the \v and \0 escapes are not allowed in JSON strings.
73-
*)
85+
Another thing to note is that the \v and \0 escapes are not allowed in JSON strings.
86+
*)
7487
| '\000'
75-
when i = l - 1
88+
when !i = l - 1
7689
||
77-
let next = String.unsafe_get s (i + 1) in
90+
let next = String.unsafe_get s (!i + 1) in
7891
next < '0' || next > '9' ->
79-
f +> "\\0"
80-
| '\\' (* when not utf*) -> f +> "\\\\"
92+
f +> "\\0";
93+
incr i
94+
| '\\' (* when not utf*) ->
95+
f +> "\\\\";
96+
incr i
8197
| '\000' .. '\031' | '\127' ->
8298
let c = Char.code c in
8399
f +> "\\x";
84100
f +> Array.unsafe_get array_conv (c lsr 4);
85-
f +> Array.unsafe_get array_conv (c land 0xf)
86-
| '\128' .. '\255' (* when not utf*) ->
87-
let c = Char.code c in
88-
f +> "\\x";
89-
f +> Array.unsafe_get array_conv (c lsr 4);
90-
f +> Array.unsafe_get array_conv (c land 0xf)
91-
| '\"' -> f +> "\\\"" (* quote*)
92-
| _ -> f +> Array.unsafe_get array_str1 (Char.code c)
101+
f +> Array.unsafe_get array_conv (c land 0xf);
102+
incr i
103+
| '\128' .. '\255' -> (
104+
(* Check if this is part of a valid UTF-8 sequence *)
105+
let utf8_byte = classify c in
106+
match utf8_byte with
107+
| Single _ ->
108+
(* Single byte >= 128, escape it *)
109+
let c = Char.code c in
110+
f +> "\\x";
111+
f +> Array.unsafe_get array_conv (c lsr 4);
112+
f +> Array.unsafe_get array_conv (c land 0xf);
113+
incr i
114+
| Leading (n, _) ->
115+
(* Start of UTF-8 sequence, output the whole sequence as-is *)
116+
let rec output_utf8_sequence pos remaining =
117+
if remaining > 0 && pos < l then (
118+
let byte = String.unsafe_get s pos in
119+
f +> Array.unsafe_get array_str1 (Char.code byte);
120+
output_utf8_sequence (pos + 1) (remaining - 1))
121+
in
122+
output_utf8_sequence !i (n + 1);
123+
(* Skip the continuation bytes *)
124+
i := !i + n + 1
125+
| Cont _ ->
126+
(* Continuation byte, should be handled as part of Leading case *)
127+
incr i
128+
| Invalid ->
129+
(* Invalid UTF-8 byte, escape it *)
130+
let c = Char.code c in
131+
f +> "\\x";
132+
f +> Array.unsafe_get array_conv (c lsr 4);
133+
f +> Array.unsafe_get array_conv (c land 0xf);
134+
incr i)
135+
| '\"' ->
136+
f +> "\\\"";
137+
incr i (* quote*)
138+
| _ ->
139+
f +> Array.unsafe_get array_str1 (Char.code c);
140+
incr i
93141
done
94142
in
95143
f +> "\"";

tests/syntax_tests/data/printer/expr/exoticIdent.res

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,9 @@ let dict = {
6262
\"key": 42,
6363
\"KEY": 42,
6464
}
65+
66+
type t = {\"🎉": int}
67+
68+
let x = {
69+
\"🎉": 42,
70+
}

tests/syntax_tests/data/printer/expr/expected/exoticIdent.res.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,9 @@ let dict = {
7474
key: 42,
7575
\"KEY": 42,
7676
}
77+
78+
type t = {\"🎉": int}
79+
80+
let x = {
81+
\"🎉": 42,
82+
}

tests/tests/src/exotic_labels_test.mjs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ let dict = {
1010
KEY: 1
1111
};
1212

13+
let x = {
14+
"🎉": 42
15+
};
16+
1317
export {
1418
fn1,
1519
dict,
20+
x,
1621
}
1722
/* No side effect */

tests/tests/src/exotic_labels_test.res

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,9 @@ let dict = {
1616
key: 1,
1717
\"KEY": 1,
1818
}
19+
20+
type t = {\"🎉": int}
21+
22+
let x = {
23+
\"🎉": 42,
24+
}

tests/tests/src/poly_variant_test.mjs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ eq("File \"poly_variant_test.res\", line 161, characters 5-12", 3, p_is_int_test
9696
VAL: 2
9797
}));
9898

99+
eq("File \"poly_variant_test.res\", line 183, characters 5-12", "🚀", "🚀");
100+
101+
eq("File \"poly_variant_test.res\", line 184, characters 5-12", "🔥", "🔥");
102+
99103
Mt.from_pair_suites("Poly_variant_test", suites.contents);
100104

101105
/* Not a pure module */

tests/tests/src/poly_variant_test.res

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,4 +177,11 @@ let hey = x =>
177177
Js.log(v)
178178
}
179179

180+
type t = [#"🚀" | #"🔥"]
181+
182+
let () = {
183+
eq(__LOC__, "🚀", (#"🚀": t :> string))
184+
eq(__LOC__, "🔥", (#"🔥": t :> string))
185+
}
186+
180187
let () = Mt.from_pair_suites(__MODULE__, suites.contents)

0 commit comments

Comments
 (0)