Skip to content

Commit 83a8c4c

Browse files
committed
refactor
1 parent e0f5de3 commit 83a8c4c

File tree

13 files changed

+228
-177
lines changed

13 files changed

+228
-177
lines changed

compiler/ext/ext_embed.ml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,45 @@ let get_embed_tag (name : string) : string option =
44
if String.length name > plen && String.sub name 0 plen = prefix then
55
Some (String.sub name plen (String.length name - plen))
66
else None
7+
8+
let is_valid_embed_id (s : string) : bool =
9+
let len = String.length s in
10+
if len = 0 then false
11+
else
12+
let lead = s.[0] in
13+
let is_letter = function
14+
| 'A' .. 'Z' | 'a' .. 'z' -> true
15+
| _ -> false
16+
in
17+
let is_ident_char = function
18+
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' -> true
19+
| _ -> false
20+
in
21+
if not (is_letter lead) then false
22+
else
23+
let rec loop i =
24+
if i >= len then true
25+
else if is_ident_char s.[i] then loop (i + 1)
26+
else false
27+
in
28+
loop 1
29+
30+
let invalid_id_error_message =
31+
"Invalid `id` for embed. Embed `id` must start with a letter, and only \
32+
contain letters, digits, and underscores."
33+
34+
let missing_id_error_message = "Embed config record must include `id: string`."
35+
36+
let invalid_payload_error_message =
37+
"Embed payload must be either a string literal or a record literal."
38+
39+
let normalize_tag_for_symbol (tag : string) : string =
40+
(* Embed tags are already validated by the parser as extension identifiers
41+
(attr-id with optional dot-separated segments). We only need to make the
42+
tag segment safe for inclusion in a single identifier by mapping '.' to
43+
'_'. *)
44+
let b = Bytes.of_string tag in
45+
for i = 0 to Bytes.length b - 1 do
46+
if Bytes.get b i = '.' then Bytes.set b i '_'
47+
done;
48+
Bytes.unsafe_to_string b

compiler/ext/ext_embed.mli

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
11
val get_embed_tag : string -> string option
22
(** [get_embed_tag name] returns [Some base] when [name] starts with
33
the embed prefix "embed." and has a non-empty remainder; otherwise [None]. *)
4+
5+
val is_valid_embed_id : string -> bool
6+
(** Validate embed `id`: must start with a letter and contain only
7+
letters, digits, and underscores. *)
8+
9+
val invalid_id_error_message : string
10+
(** Centralized error message for invalid embed `id`. *)
11+
12+
val missing_id_error_message : string
13+
(** Error when a config record omits `id` or provides a non-string `id`. *)
14+
15+
val invalid_payload_error_message : string
16+
(** Error when embed payload is not a string literal or record literal. *)
17+
18+
19+
val normalize_tag_for_symbol : string -> string
20+
(** Convert an embed tag (validated as an attribute id) into a safe fragment
21+
for inclusion in a single identifier, by replacing '.' with '_'. *)

compiler/frontend/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
(wrapped false)
44
(flags
55
(:standard -w +a-4-9-40-42-70))
6-
(libraries common ml unix))
6+
(libraries common ml))

compiler/frontend/embed_index.ml

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,46 @@ let payload_to_data (payload : Ast_payload.t) :
5858
match e.pexp_desc with
5959
| Pexp_constant (Pconst_string (txt, _)) ->
6060
Some (Ext_json_noloc.str txt, e.pexp_loc)
61-
| _ -> (
61+
| Pexp_record _ -> (
6262
match expr_to_json e with
6363
| Some json -> Some (json, e.pexp_loc)
64-
| None -> None))
64+
| None -> None)
65+
| _ -> None)
6566
| _ -> None
6667

68+
let validate_id_in_payload (payload : Ast_payload.t) : unit =
69+
match payload with
70+
| PStr [{pstr_desc = Pstr_eval (e, _attrs); _}] -> (
71+
match e.pexp_desc with
72+
| Pexp_record (fields, None) ->
73+
let found = ref false in
74+
let rec find = function
75+
| [] ->
76+
if not !found then
77+
Location.raise_errorf ~loc:e.pexp_loc "%s"
78+
Ext_embed.missing_id_error_message
79+
| ({lid; x = v; _} : Parsetree.expression Parsetree.record_element)
80+
:: rest ->
81+
let name = String.concat "." (Longident.flatten lid.txt) in
82+
if name = "id" then
83+
match v.pexp_desc with
84+
| Pexp_constant (Pconst_string (s, _)) ->
85+
found := true;
86+
if not (Ext_embed.is_valid_embed_id s) then
87+
Location.raise_errorf ~loc:v.pexp_loc "%s"
88+
Ext_embed.invalid_id_error_message
89+
| _ ->
90+
Location.raise_errorf ~loc:v.pexp_loc "%s"
91+
Ext_embed.missing_id_error_message
92+
else find rest
93+
in
94+
find fields
95+
| _ -> ())
96+
| _ -> ()
97+
6798
let write_structure_index ~outprefix ~sourcefile (ast : structure) : unit =
6899
if is_enabled () then (
100+
let modulename = Ext_filename.module_name outprefix in
69101
let entries = ref [] in
70102
let counts : (string, int) Hashtbl.t = Hashtbl.create 7 in
71103
let bump tag =
@@ -87,10 +119,29 @@ let write_structure_index ~outprefix ~sourcefile (ast : structure) : unit =
87119
| _ -> Ext_json_noloc.to_string data
88120
in
89121
let literal_hash = csv_hash tag data_str in
122+
let tag_normalized = Ext_embed.normalize_tag_for_symbol tag in
123+
let suffix =
124+
match data with
125+
| Ext_json_noloc.Str _ -> string_of_int occurrence_index
126+
| Ext_json_noloc.Obj map -> (
127+
match Map_string.find_opt map "id" with
128+
| Some (Ext_json_noloc.Str s) -> s
129+
| _ ->
130+
(* Should be prevented by earlier validation *)
131+
Location.raise_errorf ~loc "%s"
132+
Ext_embed.missing_id_error_message)
133+
| _ ->
134+
Location.raise_errorf ~loc "%s"
135+
Ext_embed.invalid_payload_error_message
136+
in
137+
let target_module =
138+
Printf.sprintf "%s__embed_%s_%s" modulename tag_normalized suffix
139+
in
90140
let entry =
91141
Ext_json_noloc.kvs
92142
[
93143
("tag", Ext_json_noloc.str tag);
144+
("targetModule", Ext_json_noloc.str target_module);
94145
("context", Ext_json_noloc.str context);
95146
( "occurrenceIndex",
96147
Ext_json_noloc.flo (string_of_int occurrence_index) );
@@ -101,7 +152,7 @@ let write_structure_index ~outprefix ~sourcefile (ast : structure) : unit =
101152
in
102153
entries := entry :: !entries
103154
in
104-
let normalize_tag (tag : string) : string =
155+
let base_tag_of_extension (tag : string) : string =
105156
match Ext_embed.get_embed_tag tag with
106157
| Some t -> t
107158
| None -> tag
@@ -124,19 +175,18 @@ let write_structure_index ~outprefix ~sourcefile (ast : structure) : unit =
124175
(fun self m ->
125176
(match m.pmod_desc with
126177
| Pmod_extension ({txt = tag; _}, payload) ->
127-
let base_tag = normalize_tag tag in
128-
if should_collect_tag base_tag then
178+
let base_tag = base_tag_of_extension tag in
179+
if should_collect_tag base_tag then (
180+
validate_id_in_payload payload;
129181
match payload_to_data payload with
130182
| Some (data, loc) ->
131183
let context =
132184
Option.value ~default:"module" !current_mod_context
133185
in
134186
add_entry ~tag:base_tag ~context ~data ~loc
135187
| None ->
136-
Location.raise_errorf ~loc:m.pmod_loc
137-
"%%%s expects a string literal or a JSON-serializable \
138-
record literal"
139-
tag
188+
Location.raise_errorf ~loc:m.pmod_loc "%s"
189+
Ext_embed.invalid_payload_error_message)
140190
else ()
141191
| _ -> ());
142192
let prev = !current_mod_context in
@@ -161,16 +211,15 @@ let write_structure_index ~outprefix ~sourcefile (ast : structure) : unit =
161211
(fun self e ->
162212
(match e.pexp_desc with
163213
| Pexp_extension ({txt = tag; _}, payload) ->
164-
let base_tag = normalize_tag tag in
165-
if should_collect_tag base_tag then
214+
let base_tag = base_tag_of_extension tag in
215+
if should_collect_tag base_tag then (
216+
validate_id_in_payload payload;
166217
match payload_to_data payload with
167218
| Some (data, loc) ->
168219
add_entry ~tag:base_tag ~context:"expr" ~data ~loc
169220
| None ->
170-
Location.raise_errorf ~loc:e.pexp_loc
171-
"%%%s expects a string literal or a JSON-serializable \
172-
record literal"
173-
tag
221+
Location.raise_errorf ~loc:e.pexp_loc "%s"
222+
Ext_embed.invalid_payload_error_message)
174223
else ()
175224
| _ -> ());
176225
default_it.expr self e);
@@ -180,7 +229,6 @@ let write_structure_index ~outprefix ~sourcefile (ast : structure) : unit =
180229
let entries_json =
181230
!entries |> List.rev |> Array.of_list |> Ext_json_noloc.arr
182231
in
183-
let modulename = Ext_filename.module_name outprefix in
184232
let source_path = sourcefile in
185233
let json =
186234
Ext_json_noloc.kvs

compiler/frontend/embed_ppx.ml

Lines changed: 28 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,7 @@
11
open Parsetree
22

3-
let normalize_tag (tag : string) : string =
4-
let buf = Bytes.create (String.length tag) in
5-
let j = ref 0 in
6-
String.iter
7-
(fun c ->
8-
let c' =
9-
if
10-
(Char.code c >= 48 && Char.code c <= 57)
11-
|| (Char.code c >= 65 && Char.code c <= 90)
12-
|| (Char.code c >= 97 && Char.code c <= 122)
13-
then c
14-
else '_'
15-
in
16-
Bytes.unsafe_set buf !j c';
17-
incr j)
18-
tag;
19-
Bytes.sub_string buf 0 !j
20-
213
let get_module_name () = Ext_filename.module_name !Location.input_name
224

23-
let sanitize_suffix (s : string) : string =
24-
let buf = Buffer.create (String.length s) in
25-
let prev_underscore = ref false in
26-
String.iter
27-
(fun ch ->
28-
let c =
29-
match ch with
30-
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> Some ch
31-
| _ -> Some '_'
32-
in
33-
match c with
34-
| Some '_' ->
35-
if not !prev_underscore then (
36-
Buffer.add_char buf '_';
37-
prev_underscore := true)
38-
| Some c ->
39-
Buffer.add_char buf c;
40-
prev_underscore := false
41-
| None -> ())
42-
s;
43-
let out = Buffer.contents buf in
44-
if out = "" then "1" else out
45-
465
let payload_expr (payload : Ast_payload.t) : expression option =
476
match payload with
487
| PStr [{pstr_desc = Pstr_eval (e, _attrs); _}] -> Some e
@@ -79,6 +38,32 @@ let rewrite (ast : structure) : structure =
7938
in
8039
let module_name = get_module_name () in
8140

41+
let suffix_from_payload_expr ~base_tag ~bump (e : expression) : string =
42+
match e.pexp_desc with
43+
| Pexp_constant (Pconst_string (_, _)) ->
44+
(* String payload: no config id, use occurrence index *)
45+
string_of_int (bump base_tag)
46+
| Pexp_record (_, None) -> (
47+
match get_config_id e with
48+
| Some id ->
49+
if Ext_embed.is_valid_embed_id id then id
50+
else
51+
Location.raise_errorf ~loc:e.pexp_loc "%s"
52+
Ext_embed.invalid_id_error_message
53+
| None ->
54+
Location.raise_errorf ~loc:e.pexp_loc "%s"
55+
Ext_embed.missing_id_error_message)
56+
| _ ->
57+
Location.raise_errorf ~loc:e.pexp_loc "%s"
58+
Ext_embed.invalid_payload_error_message
59+
in
60+
61+
let target_for ~module_name ~base_tag ~bump (e : expression) : string =
62+
let tag_norm = Ext_embed.normalize_tag_for_symbol base_tag in
63+
let suffix = suffix_from_payload_expr ~base_tag ~bump e in
64+
Printf.sprintf "%s__embed_%s_%s" module_name tag_norm suffix
65+
in
66+
8267
let module_expr (self : Ast_mapper.mapper) (m : module_expr) : module_expr =
8368
match m.pmod_desc with
8469
| Pmod_extension ({txt = tag; _}, payload) -> (
@@ -89,15 +74,7 @@ let rewrite (ast : structure) : structure =
8974
match payload_expr payload with
9075
| None -> Ast_mapper.default_mapper.module_expr self m
9176
| Some e ->
92-
let tag_norm = normalize_tag base_tag in
93-
let suffix =
94-
match get_config_id e with
95-
| Some id -> sanitize_suffix id
96-
| None -> string_of_int (bump base_tag)
97-
in
98-
let target =
99-
Printf.sprintf "%s__embed_%s_%s" module_name tag_norm suffix
100-
in
77+
let target = target_for ~module_name ~base_tag ~bump e in
10178
Ast_helper.Mod.ident ~loc:m.pmod_loc
10279
{txt = Longident.Lident target; loc = m.pmod_loc}))
10380
| _ -> Ast_mapper.default_mapper.module_expr self m
@@ -112,15 +89,7 @@ let rewrite (ast : structure) : structure =
11289
match payload_expr payload with
11390
| None -> Ast_mapper.default_mapper.expr self e
11491
| Some ex ->
115-
let tag_norm = normalize_tag base_tag in
116-
let suffix =
117-
match get_config_id ex with
118-
| Some id -> sanitize_suffix id
119-
| None -> string_of_int (bump base_tag)
120-
in
121-
let target =
122-
Printf.sprintf "%s__embed_%s_%s" module_name tag_norm suffix
123-
in
92+
let target = target_for ~module_name ~base_tag ~bump ex in
12493
Ast_helper.Exp.ident ~loc:e.pexp_loc
12594
{
12695
txt = Longident.Ldot (Longident.Lident target, "default");

compiler/frontend/ppx_entry.ml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ let rewrite_implementation (ast : Parsetree.structure) : Parsetree.structure =
5555
let jsx_module = string_of_jsx_module !jsx_module in
5656
Jsx_ppx.rewrite_implementation ~jsx_version ~jsx_module ast
5757
in
58-
(* Embed rewrite: single-pass PPX that maps ::embed nodes to generated modules *)
5958
let ast = Embed_ppx.rewrite_implementation ast in
6059
if !Js_config.no_builtin_ppx then ast
6160
else

0 commit comments

Comments
 (0)