100100 labels_count :: pos_integer (),
101101 atom_resolver :: fun ((integer ()) -> atom ()),
102102 literal_resolver :: fun ((integer ()) -> any ()),
103- type_resolver :: fun ((integer ()) -> any ())
103+ type_resolver :: fun ((integer ()) -> any ()),
104+ tail_cache :: [{tuple (), non_neg_integer ()}]
104105}).
105106
106107-type stream () :: any ().
@@ -142,7 +143,8 @@ compile(
142143 labels_count = LabelsCount ,
143144 atom_resolver = AtomResolver ,
144145 literal_resolver = LiteralResolver ,
145- type_resolver = TypeResolver
146+ type_resolver = TypeResolver ,
147+ tail_cache = []
146148 },
147149 {State1 , MSt2 } = first_pass (Opcodes , MMod , MSt1 , State0 ),
148150 MSt3 = second_pass (MMod , MSt2 , State1 ),
@@ -170,18 +172,30 @@ first_pass(
170172 ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
171173 first_pass (Rest1 , MMod , MSt1 , State0 );
172174% 2
173- first_pass (<<? OP_FUNC_INFO , Rest0 /binary >>, MMod , MSt0 , State0 ) ->
175+ first_pass (<<? OP_FUNC_INFO , Rest0 /binary >>, MMod , MSt0 , # state { tail_cache = TC } = State0 ) ->
174176 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
175177 {_ModuleAtom , Rest1 } = decode_atom (Rest0 ),
176178 {_FunctionName , Rest2 } = decode_atom (Rest1 ),
177179 {_Arity , Rest3 } = decode_literal (Rest2 ),
178180 ? TRACE (" OP_FUNC_INFO ~p , ~p , ~p \n " , [_ModuleAtom , _FunctionName , _Arity ]),
179- % Implement function clause at the previous label. (TODO: optimize it out to save space)
180- MSt1 = MMod :call_primitive_last (MSt0 , ? PRIM_RAISE_ERROR , [
181- ctx , jit_state , offset , ? FUNCTION_CLAUSE_ATOM
182- ]),
183- ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
184- first_pass (Rest3 , MMod , MSt1 , State0 );
181+ % Implement function clause at the previous label.
182+ Offset = MMod :offset (MSt0 ),
183+ {MSt1 , OffsetReg } = MMod :move_to_native_register (MSt0 , Offset ),
184+ TailCacheKey = {call_primitive_last , ? PRIM_RAISE_ERROR , [OffsetReg , ? FUNCTION_CLAUSE_ATOM ]},
185+ State1 =
186+ case lists :keyfind (TailCacheKey , 1 , TC ) of
187+ false ->
188+ MSt3 = MMod :call_primitive_last (MSt1 , ? PRIM_RAISE_ERROR , [
189+ ctx , jit_state , {free , OffsetReg }, ? FUNCTION_CLAUSE_ATOM
190+ ]),
191+ State0 # state {tail_cache = [{TailCacheKey , Offset } | TC ]};
192+ {TailCacheKey , CacheOffset } ->
193+ MSt2 = MMod :jump_to_offset (MSt1 , CacheOffset ),
194+ MSt3 = MMod :free_native_registers (MSt2 , [OffsetReg ]),
195+ State0
196+ end ,
197+ ? ASSERT_ALL_NATIVE_FREE (MSt3 ),
198+ first_pass (Rest3 , MMod , MSt3 , State1 );
185199% 3
186200first_pass (
187201 <<? OP_INT_CALL_END >>, MMod , MSt0 , # state {labels_count = LabelsCount } = State
@@ -203,26 +217,56 @@ first_pass(<<?OP_CALL, Rest0/binary>>, MMod, MSt0, State0) ->
203217 ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
204218 first_pass (Rest2 , MMod , MSt1 , State0 );
205219% 5
206- first_pass (<<? OP_CALL_LAST , Rest0 /binary >>, MMod , MSt0 , State0 ) ->
220+ first_pass (<<? OP_CALL_LAST , Rest0 /binary >>, MMod , MSt0 , # state { tail_cache = TC } = State0 ) ->
207221 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
208222 {_Arity , Rest1 } = decode_literal (Rest0 ),
209223 {Label , Rest2 } = decode_label (Rest1 ),
210224 {NWords , Rest3 } = decode_literal (Rest2 ),
211225 ? TRACE (" OP_CALL_LAST ~p , ~p , ~p \n " , [_Arity , Label , NWords ]),
212- MSt1 = MMod :move_to_cp (MSt0 , {y_reg , NWords }),
213- MSt2 = MMod :increment_sp (MSt1 , NWords + 1 ),
214- MSt3 = MMod :call_only_or_schedule_next (MSt2 , Label ),
226+ TailCacheKey0 = {op_call_last , NWords , Label },
227+ case lists :keyfind (TailCacheKey0 , 1 , TC ) of
228+ false ->
229+ Offset0 = MMod :offset (MSt0 ),
230+ MSt1 = MMod :move_to_cp (MSt0 , {y_reg , NWords }),
231+ MSt2 = MMod :increment_sp (MSt1 , NWords + 1 ),
232+ TailCacheKey1 = {op_call_only , Label },
233+ case lists :keyfind (TailCacheKey1 , 1 , TC ) of
234+ false ->
235+ Offset1 = MMod :offset (MSt2 ),
236+ MSt3 = MMod :call_only_or_schedule_next (MSt2 , Label ),
237+ State1 = State0 # state {
238+ tail_cache = [{TailCacheKey1 , Offset1 }, {TailCacheKey0 , Offset0 } | TC ]
239+ };
240+ {TailCacheKey1 , Offset1 } ->
241+ MSt3 = MMod :jump_to_offset (MSt2 , Offset1 ),
242+ State1 = State0 # state {
243+ tail_cache = [{TailCacheKey0 , Offset0 } | TC ]
244+ }
245+ end ;
246+ {TailCacheKey0 , Offset0 } ->
247+ MSt3 = MMod :jump_to_offset (MSt0 , Offset0 ),
248+ State1 = State0
249+ end ,
215250 ? ASSERT_ALL_NATIVE_FREE (MSt3 ),
216- first_pass (Rest3 , MMod , MSt3 , State0 );
251+ first_pass (Rest3 , MMod , MSt3 , State1 );
217252% 6
218- first_pass (<<? OP_CALL_ONLY , Rest0 /binary >>, MMod , MSt0 , State0 ) ->
253+ first_pass (<<? OP_CALL_ONLY , Rest0 /binary >>, MMod , MSt0 , # state { tail_cache = TC } = State0 ) ->
219254 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
220255 {_Arity , Rest1 } = decode_literal (Rest0 ),
221256 {Label , Rest2 } = decode_label (Rest1 ),
222257 ? TRACE (" OP_CALL_ONLY ~p , ~p \n " , [_Arity , Label ]),
223- MSt1 = MMod :call_only_or_schedule_next (MSt0 , Label ),
258+ TailCacheKey = {op_call_only , Label },
259+ case lists :keyfind (TailCacheKey , 1 , TC ) of
260+ false ->
261+ Offset = MMod :offset (MSt0 ),
262+ MSt1 = MMod :call_only_or_schedule_next (MSt0 , Label ),
263+ State1 = State0 # state {tail_cache = [{TailCacheKey , Offset } | TC ]};
264+ {TailCacheKey , Offset } ->
265+ MSt1 = MMod :jump_to_offset (MSt0 , Offset ),
266+ State1 = State0
267+ end ,
224268 ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
225- first_pass (Rest2 , MMod , MSt1 , State0 );
269+ first_pass (Rest2 , MMod , MSt1 , State1 );
226270% 7
227271first_pass (<<? OP_CALL_EXT , Rest0 /binary >>, MMod , MSt0 , State0 ) ->
228272 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
@@ -348,7 +392,7 @@ first_pass(<<?OP_DEALLOCATE, Rest0/binary>>, MMod, MSt0, State0) ->
348392 ? ASSERT_ALL_NATIVE_FREE (MSt2 ),
349393 first_pass (Rest1 , MMod , MSt2 , State0 );
350394% 19
351- first_pass (<<? OP_RETURN , Rest /binary >>, MMod , MSt0 , State0 ) ->
395+ first_pass (<<? OP_RETURN , Rest /binary >>, MMod , MSt0 , # state { tail_cache = TC } = State0 ) ->
352396 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
353397 ? TRACE (" OP_RETURN\n " , []),
354398 % Optimized return: check if returning within same module
@@ -371,9 +415,18 @@ first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt0, State0) ->
371415 ),
372416 MSt5 = MMod :free_native_registers (MSt4 , [CpReg0 ]),
373417 % Different module: use existing slow path
374- MSt6 = MMod :call_primitive_last (MSt5 , ? PRIM_RETURN , [ctx , jit_state ]),
418+ TailCacheKey = {call_primitive_last , ? PRIM_RETURN },
419+ case lists :keyfind (TailCacheKey , 1 , TC ) of
420+ false ->
421+ Offset = MMod :offset (MSt5 ),
422+ MSt6 = MMod :call_primitive_last (MSt5 , ? PRIM_RETURN , [ctx , jit_state ]),
423+ State1 = State0 # state {tail_cache = [{TailCacheKey , Offset } | TC ]};
424+ {TailCacheKey , Offset } ->
425+ MSt6 = MMod :jump_to_offset (MSt5 , Offset ),
426+ State1 = State0
427+ end ,
375428 ? ASSERT_ALL_NATIVE_FREE (MSt6 ),
376- first_pass (Rest , MMod , MSt6 , State0 );
429+ first_pass (Rest , MMod , MSt6 , State1 );
377430% 20
378431first_pass (<<? OP_SEND , Rest /binary >>, MMod , MSt0 , State0 ) ->
379432 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
@@ -836,13 +889,22 @@ first_pass(<<?OP_SELECT_TUPLE_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
836889 ? ASSERT_ALL_NATIVE_FREE (MSt5 ),
837890 first_pass (Rest4 , MMod , MSt5 , State0 );
838891% 61
839- first_pass (<<? OP_JUMP , Rest0 /binary >>, MMod , MSt0 , State0 ) ->
892+ first_pass (<<? OP_JUMP , Rest0 /binary >>, MMod , MSt0 , # state { tail_cache = TC } = State0 ) ->
840893 ? ASSERT_ALL_NATIVE_FREE (MSt0 ),
841894 {Label , Rest1 } = decode_label (Rest0 ),
842895 ? TRACE (" OP_JUMP ~p \n " , [Label ]),
843- MSt1 = MMod :call_only_or_schedule_next (MSt0 , Label ),
844- ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
845- first_pass (Rest1 , MMod , MSt1 , State0 );
896+ TailCacheKey = {op_call_only , Label },
897+ case lists :keyfind (TailCacheKey , 1 , TC ) of
898+ false ->
899+ Offset = MMod :offset (MSt0 ),
900+ MSt1 = MMod :call_only_or_schedule_next (MSt0 , Label ),
901+ ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
902+ first_pass (Rest1 , MMod , MSt1 , State0 # state {tail_cache = [{TailCacheKey , Offset } | TC ]});
903+ {TailCacheKey , Offset } ->
904+ MSt1 = MMod :jump_to_offset (MSt0 , Offset ),
905+ ? ASSERT_ALL_NATIVE_FREE (MSt1 ),
906+ first_pass (Rest1 , MMod , MSt1 , State0 )
907+ end ;
846908% 62
847909% Same implementation as OP_TRY, to confirm.
848910first_pass (<<? OP_CATCH , Rest0 /binary >>, MMod , MSt0 , State0 ) ->
0 commit comments