Skip to content

Commit dc8dd28

Browse files
janvorlijkotas
andauthored
JIT/AOT to interpreter calls support (#116353)
* JIT/AOT to interpreter calls support Until now, we only had support for calling interpreted void returning method with no arguments by the JIT/AOT. This change enables support for passing all possible kinds of arguments and returning all types. It reuses the `CallStubGenerator` that was implemented for the interpreter to native code usage and adds support for the other direction to it in mostly trivial manner. In addition to that, assembler routines for storing argument values to the interpreter stack were added. The `CallStubGenerator` generates a list of routines to copy the arguments from CPU registers and stack to the interpreter stack. The last one makes call to the `ExecuteInterpretedMethod` and then puts the result into appropriate registers. For functions that return result via a return buffer, the buffer is passed to the `ExecuteInterpretedMethod` so that the IR opcode to return valuetype stores it directly to the return buffer. The ARM64 for Apple OSes is the most optimized version, as it is the primary target where the performance matters the most. It eliminates argument registers saving to stack on the fast path when we already have the call stub. * Fix m_pInterpThreadContext offset after rebase to main * Fix m_pInterpThreadContext for more targets * Move the CreateNativeToInterpreterCallStub to prestub * Fix build break and add a comment * Create stub only when interp. method called from JIT/AOT code * Unify some asm macros * Update src/coreclr/pal/inc/unixasmmacrosamd64.inc * Fix OSX x64 build --------- Co-authored-by: Jan Kotas <[email protected]>
1 parent 7bbab4f commit dc8dd28

20 files changed

+3529
-377
lines changed

src/coreclr/interpreter/interpretershared.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#define INTERP_INDIRECT_HELPER_TAG 1 // When a helper ftn's address is indirect we tag it with this tag bit
2121

22+
struct CallStubHeader;
23+
2224
struct InterpMethod
2325
{
2426
#if DEBUG
@@ -27,6 +29,8 @@ struct InterpMethod
2729
CORINFO_METHOD_HANDLE methodHnd;
2830
int32_t allocaSize;
2931
void** pDataItems;
32+
// This stub is used for calling the interpreted method from JITted/AOTed code
33+
CallStubHeader *pCallStub;
3034
bool initLocals;
3135

3236
InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems, bool initLocals)
@@ -38,6 +42,7 @@ struct InterpMethod
3842
this->allocaSize = allocaSize;
3943
this->pDataItems = pDataItems;
4044
this->initLocals = initLocals;
45+
pCallStub = NULL;
4146
}
4247

4348
bool CheckIntegrity()

src/coreclr/pal/inc/unixasmmacrosamd64.inc

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ C_FUNC(\Name\()_End):
222222

223223
.endm
224224

225+
.macro SKIP_ARGUMENT_REGISTERS
226+
227+
add rsp, 6 * 8
228+
229+
.endm
230+
225231
.macro SAVE_FLOAT_ARGUMENT_REGISTERS ofs
226232

227233
save_xmm128_postrsp xmm0, \ofs
@@ -344,7 +350,7 @@ C_FUNC(\Name\()_End):
344350
.macro EPILOG_WITH_TRANSITION_BLOCK_RETURN
345351

346352
free_stack __PWTB_StackAlloc
347-
POP_ARGUMENT_REGISTERS
353+
SKIP_ARGUMENT_REGISTERS
348354
POP_CALLEE_SAVED_REGISTERS
349355
ret
350356

@@ -368,19 +374,29 @@ C_FUNC(\Name\()_End):
368374

369375
.endm
370376

371-
// Inlined version of GetThreadEEAllocContext. Trashes volatile registers.
372-
.macro INLINE_GET_ALLOC_CONTEXT_BASE
373-
#if defined(FEATURE_EMULATED_TLS) || defined(__APPLE__)
374-
call C_FUNC(GetThreadEEAllocContext)
377+
.macro INLINE_GET_TLS_VAR Var
378+
.att_syntax
379+
#if defined(__APPLE__)
380+
movq _\Var@TLVP(%rip), %rdi
381+
callq *(%rdi)
375382
#else
376-
.att_syntax
377383
.byte 0x66 // data16 prefix - padding to have space for linker relaxations
378-
leaq t_runtime_thread_locals@TLSGD(%rip), %rdi
384+
leaq \Var@TLSGD(%rip), %rdi
379385
.byte 0x66 //
380386
.byte 0x66 //
381387
.byte 0x48 // rex.W prefix, also for padding
382388
callq __tls_get_addr@PLT
383-
.intel_syntax noprefix
389+
#endif
390+
.intel_syntax noprefix
391+
.endm
392+
393+
394+
// Inlined version of GetThreadEEAllocContext. Trashes volatile registers.
395+
.macro INLINE_GET_ALLOC_CONTEXT_BASE
396+
#ifdef FEATURE_EMULATED_TLS
397+
call C_FUNC(GetThreadEEAllocContext)
398+
#else
399+
INLINE_GET_TLS_VAR t_runtime_thread_locals
384400

385401
.ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0
386402
lea rax, [rax + OFFSETOF__RuntimeThreadLocals__ee_alloc_context]
@@ -405,3 +421,9 @@ C_FUNC(\Name\()_End):
405421
free_stack 56
406422
POP_CALLEE_SAVED_REGISTERS
407423
.endm
424+
425+
.macro INLINE_GETTHREAD
426+
// Inlined version of call C_FUNC(RhpGetThread)
427+
INLINE_GET_TLS_VAR t_CurrentThreadInfo
428+
mov rax, [rax + OFFSETOF__ThreadLocalInfo__m_pThread]
429+
.endm

src/coreclr/pal/inc/unixasmmacrosarm64.inc

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ C_FUNC(\Name\()_End):
184184
// ArgumentRegisters::x2
185185
// ArgumentRegisters::x1
186186
// ArgumentRegisters::x0
187+
// ArgumentRegisters::x8
187188
// FloatRegisters::q7
188189
// FloatRegisters::q6
189190
// FloatRegisters::q5
@@ -192,7 +193,7 @@ C_FUNC(\Name\()_End):
192193
// FloatRegisters::q2
193194
// FloatRegisters::q1
194195
// FloatRegisters::q0
195-
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1
196+
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1, SaveGPArgs = 1
196197

197198
__PWTB_FloatArgumentRegisters = \extraLocals
198199
__PWTB_SaveFPArgs = \SaveFPArgs
@@ -222,8 +223,10 @@ C_FUNC(\Name\()_End):
222223
// Allocate space for the rest of the frame
223224
PROLOG_STACK_ALLOC __PWTB_StackAlloc
224225

225-
// Spill argument registers.
226-
SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters
226+
.if (\SaveGPArgs == 1)
227+
// Spill argument registers.
228+
SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters
229+
.endif
227230

228231
.if (__PWTB_SaveFPArgs == 1)
229232
SAVE_FLOAT_ARGUMENT_REGISTERS sp, \extraLocals
@@ -301,7 +304,6 @@ C_FUNC(\Name\()_End):
301304

302305
.endm
303306

304-
305307
//-----------------------------------------------------------------------------
306308
// Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and ends by preparing for tail-calling.
307309
// Since this is a tail call argument registers are restored.
@@ -325,6 +327,41 @@ C_FUNC(\Name\()_End):
325327

326328
.endm
327329

330+
// Loads the address of a thread-local variable into the target register,
331+
// which cannot be x0.
332+
// Preserves registers except for xip0 and xip1 on Apple
333+
.macro INLINE_GET_TLS_VAR target, var
334+
.ifc \target, x0
335+
.error "target cannot be x0"
336+
.endif
337+
338+
// This sequence of instructions is recognized and potentially patched
339+
// by the linker (GD->IE/LE relaxation).
340+
#if defined(__APPLE__)
341+
342+
adrp x0, \var@TLVPPAGE
343+
ldr x0, [x0, \var@TLVPPAGEOFF]
344+
ldr \target, [x0]
345+
346+
blr \target
347+
// End of the sequence
348+
349+
mov \target, x0
350+
#else
351+
adrp x0, :tlsdesc:\var
352+
ldr \target, [x0, #:tlsdesc_lo12:\var]
353+
add x0, x0, :tlsdesc_lo12:\var
354+
.tlsdesccall \var
355+
blr \target
356+
// End of the sequence
357+
358+
mrs \target, tpidr_el0
359+
add \target, \target, x0
360+
#endif
361+
362+
.endm
363+
364+
328365
// Inlined version of GetThreadEEAllocContext. Target cannot be x0 or x1.
329366
.macro INLINE_GET_ALLOC_CONTEXT_BASE target
330367
.ifc \target, x0
@@ -345,17 +382,7 @@ C_FUNC(\Name\()_End):
345382
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x20
346383
#else
347384
PROLOG_SAVE_REG_PAIR_INDEXED x0, lr, -0x10
348-
349-
// This sequence of instructions is recognized and potentially patched
350-
// by the linker (GD->IE/LE relaxation).
351-
adrp x0, :tlsdesc:t_runtime_thread_locals
352-
ldr \target, [x0, :tlsdesc_lo12:t_runtime_thread_locals]
353-
add x0, x0, :tlsdesc_lo12:t_runtime_thread_locals
354-
blr \target
355-
// End of the sequence
356-
357-
mrs \target, TPIDR_EL0
358-
add \target, \target, x0
385+
INLINE_GET_TLS_VAR \target, t_runtime_thread_locals
359386

360387
.ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0
361388
add \target, x0, OFFSETOF__RuntimeThreadLocals__ee_alloc_context
@@ -470,3 +497,13 @@ $__RedirectionStubEndFuncName
470497
0:
471498
#endif
472499
.endm
500+
501+
#define xip0 x16
502+
#define xip1 x17
503+
#define xpr x18
504+
505+
// Inlined version of RhpGetThread. Target cannot be x0.
506+
.macro INLINE_GETTHREAD target
507+
INLINE_GET_TLS_VAR \target, C_FUNC(t_CurrentThreadInfo)
508+
ldr \target, [\target, #OFFSETOF__ThreadLocalInfo__m_pThread]
509+
.endm

0 commit comments

Comments
 (0)