From 6fd39894656ae93ecb07084346a5b7480828ca4c Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 14 Feb 2025 18:36:43 +0200 Subject: [PATCH 01/16] Add util code for working with IL opcodes The opcode name, its argument type and a method for decoding the opcode enum value from IL byte stream. --- src/coreclr/interpreter/compiler.cpp | 2 -- src/coreclr/interpreter/intops.cpp | 51 ++++++++++++++++++++++++++++ src/coreclr/interpreter/intops.h | 4 +++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index e5d3648306bf23..2489753003ac3e 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "interpreter.h" -#include "openum.h" - static const StackType g_stackTypeFromInterpType[] = { StackTypeI4, // I1 diff --git a/src/coreclr/interpreter/intops.cpp b/src/coreclr/interpreter/intops.cpp index 7f94eb4a363c2a..aaed470714b9a7 100644 --- a/src/coreclr/interpreter/intops.cpp +++ b/src/coreclr/interpreter/intops.cpp @@ -60,3 +60,54 @@ const char* InterpOpName(int op) return ((const char*)&g_interpOpNameCharacters) + g_interpOpNameOffsets[op]; } +// Information about IL opcodes + +OPCODE_FORMAT const g_CEEOpArgs[] = { +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) args, +#include "opcode.def" +#undef OPDEF +}; + +struct CEEOpNameCharacters +{ +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) char c[sizeof(s)]; +#include "opcode.def" +#undef OPDEF +}; + +const struct CEEOpNameCharacters g_CEEOpNameCharacters = { +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) s, +#include "opcode.def" +#undef OPDEF +}; + +const uint32_t g_CEEOpNameOffsets[] = { +#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) offsetof(CEEOpNameCharacters, c), +#include "opcode.def" +#undef OPDEF +}; + +const char* CEEOpName(OPCODE op) +{ + return ((const char*)&g_CEEOpNameCharacters) + g_CEEOpNameOffsets[op]; +} + +// Also updates ip to skip over prefix, if any +OPCODE CEEDecodeOpcode(const uint8_t **pIp) +{ + OPCODE res; + const uint8_t *ip = *pIp; + + if (*ip == 0xFE) + { + // Double byte encoding, offset + ip++; + res = (OPCODE)(*ip + CEE_ARGLIST); + } + else + { + res = (OPCODE)*ip; + } + *pIp = ip; + return res; +} diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index d058602508f50d..2661a78675ce1c 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -4,6 +4,7 @@ #ifndef _INTOPS_H #define _INTOPS_H +#include "openum.h" #include typedef enum @@ -25,4 +26,7 @@ extern const InterpOpNameCharacters g_interpOpNameCharacters; const char* InterpOpName(int op); +extern OPCODE_FORMAT const g_CEEOpArgs[]; +const char* CEEOpName(OPCODE op); +OPCODE CEEDecodeOpcode(const uint8_t **ip); #endif From 1cbe90a085e48376e67c8cc029bbbbff22f02c97 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 17 Feb 2025 14:24:29 +0200 Subject: [PATCH 02/16] Implement move opcodes These will be used for arg/local loads/stores and various other move operations. --- src/coreclr/interpreter/compiler.cpp | 28 ++++++++++++++++++++++++++++ src/coreclr/interpreter/compiler.h | 2 ++ src/coreclr/interpreter/intops.def | 8 ++++++++ src/coreclr/interpreter/intops.h | 7 +++++++ src/coreclr/vm/interpexec.cpp | 24 ++++++++++++++++++++++++ 5 files changed, 69 insertions(+) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 2489753003ac3e..aa9e2f9275b896 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -337,6 +337,34 @@ void InterpCompiler::UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to) to->inCount--; } +int32_t InterpCompiler::InterpGetMovForType(InterpType interpType, bool signExtend) +{ + switch (interpType) + { + case InterpTypeI1: + case InterpTypeU1: + case InterpTypeI2: + case InterpTypeU2: + if (signExtend) + return INTOP_MOV_I4_I1 + interpType; + else + return INTOP_MOV_4; + case InterpTypeI4: + case InterpTypeR4: + return INTOP_MOV_4; + case InterpTypeI8: + case InterpTypeR8: + return INTOP_MOV_8; + case InterpTypeO: + return INTOP_MOV_P; + case InterpTypeVT: + return INTOP_MOV_VT; + default: + assert(0); + } + return -1; +} + int32_t InterpCompiler::CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE clsHnd, int size) { if (m_varsSize == m_varsCapacity) { diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index fac63198fcb2d9..524a0012ee4ea1 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -144,6 +144,8 @@ class InterpCompiler COMP_HANDLE m_compHnd; CORINFO_METHOD_INFO* m_methodInfo; + static int32_t InterpGetMovForType(InterpType interpType, bool signExtend); + int GenerateCode(CORINFO_METHOD_INFO* methodInfo); void* AllocMethodData(size_t numBytes); diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 78cbf4909e74f7..0a201c912e32b7 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -11,3 +11,11 @@ OPDEF(INTOP_RET, "ret", 2, 0, 1, InterpOpNoArgs) OPDEF(INTOP_RET_VOID, "ret.void", 1, 0, 0, InterpOpNoArgs) OPDEF(INTOP_LDC_I4, "ldc.i4", 3, 1, 0, InterpOpInt) + +OPDEF(INTOP_MOV_I4_I1, "mov.i4.i1", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_I4_U1, "mov.i4.u1", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_I4_I2, "mov.i4.i2", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_I4_U2, "mov.i4.u2", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_4, "mov.4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_8, "mov.8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_MOV_VT, "mov.vt", 4, 1, 1, InterpOpInt) diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index 2661a78675ce1c..3c0fb2e097c419 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -29,4 +29,11 @@ const char* InterpOpName(int op); extern OPCODE_FORMAT const g_CEEOpArgs[]; const char* CEEOpName(OPCODE op); OPCODE CEEDecodeOpcode(const uint8_t **ip); + +#ifdef TARGET_64BIT +#define INTOP_MOV_P INTOP_MOV_8 +#else +#define INTOP_MOV_P INTOP_MOV_4 +#endif + #endif diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index cb02d86f39e358..388e1d904b3d92 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -54,6 +54,30 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh goto EXIT_FRAME; case INTOP_RET_VOID: goto EXIT_FRAME; + +#define MOV(argtype1,argtype2) \ + LOCAL_VAR(ip [1], argtype1) = LOCAL_VAR(ip [2], argtype2); \ + ip += 3; + // When loading from a local, we might need to sign / zero extend to 4 bytes + // which is our minimum "register" size in interp. They are only needed when + // the address of the local is taken and we should try to optimize them out + // because the local can't be propagated. + case INTOP_MOV_I4_I1: MOV(int32_t, int8_t); break; + case INTOP_MOV_I4_U1: MOV(int32_t, uint8_t); break; + case INTOP_MOV_I4_I2: MOV(int32_t, int16_t); break; + case INTOP_MOV_I4_U2: MOV(int32_t, uint16_t); break; + // Normal moves between vars + case INTOP_MOV_4: MOV(int32_t, int32_t); break; + case INTOP_MOV_8: MOV(int64_t, int64_t); break; + + case INTOP_MOV_VT: + memmove(stack + ip[1], stack + ip[2], ip[3]); + ip += 4; + break; + + default: + assert(0); + break; } } From f57d83a0deb99a05c4acc23d4b4471ca7e7b9a0a Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 21 Feb 2025 22:55:53 +0200 Subject: [PATCH 03/16] Implement conv opcodes With some floating point exceptions that require special handling --- src/coreclr/interpreter/compiler.cpp | 352 ++++++++++++++++++++++++++- src/coreclr/interpreter/compiler.h | 20 +- src/coreclr/interpreter/intops.def | 45 ++++ src/coreclr/vm/interpexec.cpp | 133 ++++++++++ 4 files changed, 540 insertions(+), 10 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index aa9e2f9275b896..6f6cf532aca4f3 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -536,10 +536,26 @@ InterpMethod* InterpCompiler::CompileMethod() return CreateInterpMethod(); } +// Adds a conversion instruction for the value pointed to by sp, also updating the stack information +void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp) +{ + InterpInst *newInst; + if (prevIns) + newInst = InsertIns(prevIns, convOp); + else + newInst = AddIns(convOp); + + newInst->SetSVar(sp->var); + sp->Init(type); + int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE); + sp->var = var; + newInst->SetDVar(var); +} + int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) { - uint8_t *ip = methodInfo->ILCode; - uint8_t *codeEnd = ip + methodInfo->ILCodeSize; + m_ip = methodInfo->ILCode; + uint8_t *codeEnd = m_ip + methodInfo->ILCodeSize; m_ppOffsetToBB = (InterpBasicBlock**)AllocMemPool(sizeof(InterpBasicBlock*) * (methodInfo->ILCodeSize + 1)); m_stackCapacity = methodInfo->maxStack + 1; @@ -547,13 +563,13 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_pCBB = m_pEntryBB = AllocBB(); - while (ip < codeEnd) + while (m_ip < codeEnd) { - uint8_t opcode = *ip; + uint8_t opcode = *m_ip; switch (opcode) { case CEE_NOP: - ip++; + m_ip++; break; case CEE_LDC_I4_M1: case CEE_LDC_I4_0: @@ -569,14 +585,14 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_pLastIns->data[0] = opcode - CEE_LDC_I4_0; PushType(StackTypeI4, NULL); m_pLastIns->SetDVar(m_pStackPointer[-1].var); - ip++; + m_ip++; break; case CEE_LDC_I4_S: AddIns(INTOP_LDC_I4); - m_pLastIns->data[0] = (int8_t)ip[1]; + m_pLastIns->data[0] = (int8_t)m_ip[1]; PushType(StackTypeI4, NULL); m_pLastIns->SetDVar(m_pStackPointer[-1].var); - ip += 2; + m_ip += 2; break; case CEE_RET: { @@ -597,9 +613,327 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) // FIXME assert(0); } - ip++; + m_ip++; break; } + case CEE_CONV_U1: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U1_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I1: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I1_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U2: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U2_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I2: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_R8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I2_I8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR8: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U8_R8); +#else + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U4_R8); +#endif + break; + case StackTypeR4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U8_R4); +#else + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_U4_R4); +#endif + break; + case StackTypeI4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_U4); +#endif + break; + case StackTypeI8: +#ifndef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); +#endif + break; + case StackTypeMP: + case StackTypeO: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR8: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_R8); +#else + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I4_R8); +#endif + break; + case StackTypeR4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_R4); +#else + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I4_R4); +#endif + break; + case StackTypeI4: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_CONV_I8_I4); +#endif + break; + case StackTypeO: + case StackTypeMP: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); + break; + case StackTypeI8: +#ifndef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); +#endif + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U4: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U4_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_U4_R8); + break; + case StackTypeI4: + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8); + break; + case StackTypeMP: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_P); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I4: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I4_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_CONV_I4_R8); + break; + case StackTypeI4: + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8); + break; + case StackTypeMP: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_P); + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_I8: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_R8); + break; + case StackTypeI4: { + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); + break; + } + case StackTypeI8: + break; + case StackTypeMP: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_MOV_8); +#else + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); +#endif + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_R4: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_R8); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_I8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_I4); + break; + case StackTypeR4: + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_R8: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_I4); + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_I8); + break; + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4); + break; + case StackTypeR8: + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_U8: + CHECK_STACK(1); + switch (m_pStackPointer[-1].type) + { + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_U4); + break; + case StackTypeI8: + break; + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_U8_R4); + break; + case StackTypeR8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_U8_R8); + break; + case StackTypeMP: +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_MOV_8); +#else + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_U4); +#endif + break; + default: + assert(0); + } + m_ip++; + break; + case CEE_CONV_R_UN: + switch (m_pStackPointer[-1].type) + { + case StackTypeR4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4); + break; + case StackTypeR8: + break; + case StackTypeI8: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R_UN_I8); + break; + case StackTypeI4: + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R_UN_I4); + break; + default: + assert(0); + } + m_ip++; + break; default: assert(0); break; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 524a0012ee4ea1..9a1a45345c50b3 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -16,7 +16,12 @@ enum StackType { StackTypeO, StackTypeVT, StackTypeMP, - StackTypeF + StackTypeF, +#ifdef TARGET_64BIT + StackTypeI = StackTypeI8 +#else + StackTypeI = StackTypeI4 +#endif }; // Types relevant for interpreter vars and opcodes. They are used in the final @@ -133,6 +138,14 @@ struct StackInfo // The var associated with the value of this stack entry. Every time we push on // the stack a new var is created. int var; + + void Init(StackType type) + { + this->type = type; + clsHnd = NULL; + size = 0; + var = -1; + } }; typedef class ICorJitInfo* COMP_HANDLE; @@ -146,6 +159,8 @@ class InterpCompiler static int32_t InterpGetMovForType(InterpType interpType, bool signExtend); + uint8_t* m_ip; + int GenerateCode(CORINFO_METHOD_INFO* methodInfo); void* AllocMethodData(size_t numBytes); @@ -206,6 +221,9 @@ class InterpCompiler void PushType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd); void PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size); + // Code emit + void EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp); + // Passes int32_t* m_pMethodCode; int32_t m_MethodCodeSize; // in int32_t diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 0a201c912e32b7..293fa5282f99b8 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -19,3 +19,48 @@ OPDEF(INTOP_MOV_I4_U2, "mov.i4.u2", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_4, "mov.4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_8, "mov.8", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_VT, "mov.vt", 4, 1, 1, InterpOpInt) + +OPDEF(INTOP_CONV_R_UN_I4, "conv.r.un.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R_UN_I8, "conv.r.un.i8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I1_I4, "conv.i1.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I1_I8, "conv.i1.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I1_R4, "conv.i1.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I1_R8, "conv.i1.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U1_I4, "conv.u1.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U1_I8, "conv.u1.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U1_R4, "conv.u1.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U1_R8, "conv.u1.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I2_I4, "conv.i2.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I2_I8, "conv.i2.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I2_R4, "conv.i2.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I2_R8, "conv.i2.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U2_I4, "conv.u2.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U2_I8, "conv.u2.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U2_R4, "conv.u2.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U2_R8, "conv.u2.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I4_R4, "conv.i4.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I4_R8, "conv.i4.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U4_R4, "conv.u4.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U4_R8, "conv.u4.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_I8_I4, "conv.i8.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I8_U4, "conv.i8.u4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I8_R4, "conv.i8.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_I8_R8, "conv.i8.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_R4_I4, "conv.r4.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R4_I8, "conv.r4.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R4_R8, "conv.r4.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_R8_I4, "conv.r8.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R8_I8, "conv.r8.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_R8_R4, "conv.r8.r4", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_CONV_U8_R4, "conv.u8.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_CONV_U8_R8, "conv.u8.r8", 3, 1, 1, InterpOpNoArgs) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 388e1d904b3d92..b47e28757af767 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -75,6 +75,139 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh ip += 4; break; + case INTOP_CONV_R_UN_I4: + LOCAL_VAR(ip[1], double) = (double)LOCAL_VAR(ip[2], uint32_t); + ip += 3; + break; + case INTOP_CONV_R_UN_I8: + LOCAL_VAR(ip[1], double) = (double)LOCAL_VAR(ip[2], uint64_t); + ip += 3; + break; + case INTOP_CONV_I1_I4: + LOCAL_VAR(ip[1], int32_t) = (int8_t)LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_CONV_I1_I8: + LOCAL_VAR(ip[1], int32_t) = (int8_t)LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_CONV_I1_R4: + LOCAL_VAR(ip[1], int32_t) = (int8_t)(int32_t)LOCAL_VAR(ip[2], float); + ip += 3; + break; + case INTOP_CONV_I1_R8: + LOCAL_VAR(ip[1], int32_t) = (int8_t)(int32_t)LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_CONV_U1_I4: + LOCAL_VAR(ip[1], int32_t) = (uint8_t)LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_CONV_U1_I8: + LOCAL_VAR(ip[1], int32_t) = (uint8_t)LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_CONV_U1_R4: + LOCAL_VAR(ip[1], int32_t) = (uint8_t)(uint32_t)LOCAL_VAR(ip[2], float); + ip += 3; + break; + case INTOP_CONV_U1_R8: + LOCAL_VAR(ip[1], int32_t) = (uint8_t)(uint32_t)LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_CONV_I2_I4: + LOCAL_VAR(ip[1], int32_t) = (int16_t)LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_CONV_I2_I8: + LOCAL_VAR(ip[1], int32_t) = (int16_t)LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_CONV_I2_R4: + LOCAL_VAR(ip[1], int32_t) = (int16_t)(int32_t)LOCAL_VAR(ip[2], float); + ip += 3; + break; + case INTOP_CONV_I2_R8: + LOCAL_VAR(ip[1], int32_t) = (int16_t)(int32_t)LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_CONV_U2_I4: + LOCAL_VAR(ip[1], int32_t) = (uint16_t)LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_CONV_U2_I8: + LOCAL_VAR(ip[1], int32_t) = (uint16_t)LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_CONV_U2_R4: + LOCAL_VAR(ip[1], int32_t) = (uint16_t)(uint32_t)LOCAL_VAR(ip[2], float); + ip += 3; + break; + case INTOP_CONV_U2_R8: + LOCAL_VAR(ip[1], int32_t) = (uint16_t)(uint32_t)LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_CONV_I4_R4: + LOCAL_VAR(ip[1], int32_t) = (int32_t)LOCAL_VAR(ip[2], float); + ip += 3; + break;; + case INTOP_CONV_I4_R8: + LOCAL_VAR(ip[1], int32_t) = (int32_t)LOCAL_VAR(ip[2], double); + ip += 3; + break;; + + case INTOP_CONV_U4_R4: + case INTOP_CONV_U4_R8: + assert(0); + break; + + case INTOP_CONV_I8_I4: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_CONV_I8_U4: + LOCAL_VAR(ip[1], int64_t) = (uint32_t)LOCAL_VAR (ip[2], int32_t); + ip += 3; + break;; + case INTOP_CONV_I8_R4: + LOCAL_VAR(ip[1], int64_t) = (int64_t)LOCAL_VAR(ip[2], float); + ip += 3; + break; + case INTOP_CONV_I8_R8: + LOCAL_VAR(ip[1], int64_t) = (int64_t)LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_CONV_R4_I4: + LOCAL_VAR(ip[1], float) = (float)LOCAL_VAR(ip[2], int32_t); + ip += 3; + break;; + case INTOP_CONV_R4_I8: + LOCAL_VAR(ip[1], float) = (float)LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_CONV_R4_R8: + LOCAL_VAR(ip[1], float) = (float)LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_CONV_R8_I4: + LOCAL_VAR(ip[1], double) = (double)LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_CONV_R8_I8: + LOCAL_VAR(ip[1], double) = (double)LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_CONV_R8_R4: + LOCAL_VAR(ip[1], double) = (double)LOCAL_VAR(ip[2], float); + ip += 3; + break; + + case INTOP_CONV_U8_R4: + case INTOP_CONV_U8_R8: + // TODO + assert(0); + break; + default: assert(0); break; From c954f91e9338edcd0cc38407f730d56d4110b783 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sun, 16 Feb 2025 13:19:30 +0200 Subject: [PATCH 04/16] Traverse IL and build the initial set of basic blocks This is done as an initial iteration over the IL code, with the main objective of creating the initial set of basic blocks and mapping from il offset to basic block. In future changes, when we compile the code, these basic blocks will be linked together, according to the control flow resulting from IL instructions. --- src/coreclr/interpreter/compiler.cpp | 131 ++++++++++++++++++++++++++- src/coreclr/interpreter/compiler.h | 2 + src/coreclr/interpreter/intops.h | 43 +++++++++ 3 files changed, 175 insertions(+), 1 deletion(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 6f6cf532aca4f3..c713e6ccc12213 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -43,6 +43,13 @@ void* InterpCompiler::AllocMemPool(size_t numBytes) return malloc(numBytes); } +void* InterpCompiler::AllocMemPool0(size_t numBytes) +{ + void *ptr = AllocMemPool(numBytes); + memset(ptr, 0, numBytes); + return ptr; +} + // Allocator for potentially larger chunks of data, that we might want to free // eagerly, before method is finished compiling, to prevent excessive memory usage. void* InterpCompiler::AllocTemporary(size_t numBytes) @@ -552,16 +559,138 @@ void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type newInst->SetDVar(var); } +bool InterpCompiler::CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo) +{ + int32_t codeSize = methodInfo->ILCodeSize; + uint8_t *codeStart = methodInfo->ILCode; + uint8_t *codeEnd = codeStart + codeSize; + const uint8_t *ip = codeStart; + + m_ppOffsetToBB = (InterpBasicBlock**)AllocMemPool0(sizeof(InterpBasicBlock*) * (methodInfo->ILCodeSize + 1)); + GetBB(0); + + for (unsigned int i = 0; i < methodInfo->EHcount; i++) + { + CORINFO_EH_CLAUSE clause; + m_compHnd->getEHinfo(methodInfo->ftn, i, &clause); + + if ((codeStart + clause.TryOffset) > codeEnd || + (codeStart + clause.TryOffset + clause.TryLength) > codeEnd) + { + return false; + } + GetBB(clause.TryOffset); + + if ((codeStart + clause.HandlerOffset) > codeEnd || + (codeStart + clause.HandlerOffset + clause.HandlerLength) > codeEnd) + { + return false; + } + GetBB(clause.HandlerOffset); + + if (clause.Flags == CORINFO_EH_CLAUSE_FILTER) + { + if ((codeStart + clause.FilterOffset) > codeEnd) + return false; + GetBB(clause.FilterOffset); + } + } + + while (ip < codeEnd) + { + int32_t insOffset = (int32_t)(ip - codeStart); + OPCODE opcode = CEEDecodeOpcode(&ip); + OPCODE_FORMAT opArgs = g_CEEOpArgs[opcode]; + int32_t target; + + switch (opArgs) + { + case InlineNone: + ip++; + break; + case InlineString: + case InlineType: + case InlineField: + case InlineMethod: + case InlineTok: + case InlineSig: + case ShortInlineR: + case InlineI: + ip += 5; + break; + case InlineVar: + ip += 3; + break; + case ShortInlineVar: + case ShortInlineI: + ip += 2; + break; + case ShortInlineBrTarget: + target = insOffset + 2 + (int8_t)ip [1]; + if (target >= codeSize) + return false; + GetBB(target); + ip += 2; + GetBB((int32_t)(ip - codeStart)); + break; + case InlineBrTarget: + target = insOffset + 5 + getI4LittleEndian(ip + 1); + if (target >= codeSize) + return false; + GetBB(target); + ip += 5; + GetBB((int32_t)(ip - codeStart)); + break; + case InlineSwitch: { + uint32_t n = getI4LittleEndian(ip + 1); + ip += 5; + insOffset += 5 + 4 * n; + target = insOffset; + if (target >= codeSize) + return false; + GetBB(target); + for (uint32_t i = 0; i < n; i++) + { + target = insOffset + getI4LittleEndian(ip); + if (target >= codeSize) + return false; + GetBB(target); + ip += 4; + } + GetBB((int32_t)(ip - codeStart)); + break; + } + case InlineR: + case InlineI8: + ip += 9; + break; + default: + assert(0); + } + if (opcode == CEE_THROW || opcode == CEE_ENDFINALLY || opcode == CEE_RETHROW) + GetBB((int32_t)(ip - codeStart)); + } + + return true; +} + int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) { m_ip = methodInfo->ILCode; uint8_t *codeEnd = m_ip + methodInfo->ILCodeSize; - m_ppOffsetToBB = (InterpBasicBlock**)AllocMemPool(sizeof(InterpBasicBlock*) * (methodInfo->ILCodeSize + 1)); m_stackCapacity = methodInfo->maxStack + 1; m_pStackBase = m_pStackPointer = (StackInfo*)AllocTemporary(sizeof(StackInfo) * m_stackCapacity); m_pCBB = m_pEntryBB = AllocBB(); + m_pEntryBB->ilOffset = 0; + + if (!CreateBasicBlocks(methodInfo)) + { + // FIXME error return for compilation failure + m_hasInvalidCode = true; + goto exit; + } while (m_ip < codeEnd) { diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 9a1a45345c50b3..092dd1831096d3 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -165,6 +165,7 @@ class InterpCompiler void* AllocMethodData(size_t numBytes); void* AllocMemPool(size_t numBytes); + void* AllocMemPool0(size_t numBytes); void* AllocTemporary(size_t numBytes); void* ReallocTemporary(void* ptr, size_t numBytes); void FreeTemporary(void* ptr); @@ -233,6 +234,7 @@ class InterpCompiler void EmitCode(); int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns); InterpMethod* CreateInterpMethod(); + bool CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo); public: InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo); diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index 3c0fb2e097c419..3668e9a1eeb4fc 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -36,4 +36,47 @@ OPCODE CEEDecodeOpcode(const uint8_t **ip); #define INTOP_MOV_P INTOP_MOV_4 #endif +// Helpers identical to ones used by JIT +// FIXME how to consume GET_UNALIGNED_VAL defines from pal as jit ??? +// +//#include "pal_mstypes.h" +//#include "pal_endian.h" + +inline uint16_t getU2LittleEndian(const uint8_t* ptr) +{ + return *(uint16_t*)ptr; +} + +inline uint32_t getU4LittleEndian(const uint8_t* ptr) +{ + return *(uint32_t*)ptr; +} + +inline int16_t getI2LittleEndian(const uint8_t* ptr) +{ + return *(int16_t*)ptr; +} + +inline int32_t getI4LittleEndian(const uint8_t* ptr) +{ + return *(int32_t*)ptr; +} + +inline int64_t getI8LittleEndian(const uint8_t* ptr) +{ + return *(int64_t*)ptr; +} + +inline float getR4LittleEndian(const uint8_t* ptr) +{ + int32_t val = getI4LittleEndian(ptr); + return *(float*)&val; +} + +inline double getR8LittleEndian(const uint8_t* ptr) +{ + int64_t val = getI8LittleEndian(ptr); + return *(double*)&val; +} + #endif From d16fb1dadd1947af1aff0a949764c7a21921c528 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 17 Feb 2025 18:38:58 +0200 Subject: [PATCH 05/16] Add simple array data structure The interpreter will require additional data structures in the future: linked list, hashtable, bitset. --- src/coreclr/interpreter/datastructs.h | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/coreclr/interpreter/datastructs.h diff --git a/src/coreclr/interpreter/datastructs.h b/src/coreclr/interpreter/datastructs.h new file mode 100644 index 00000000000000..9d18997e792ea2 --- /dev/null +++ b/src/coreclr/interpreter/datastructs.h @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _DATASTRUCTS_H_ +#define _DATASTRUCTS_H_ + +template +class PtrArray +{ +private: + int32_t m_size, m_capacity; + T *m_array; + + void Grow() + { + if (m_capacity) + m_capacity *= 2; + else + m_capacity = 16; + + m_array = (T*)realloc(m_array, m_capacity * sizeof(T)); + } +public: + PtrArray() + { + m_size = 0; + m_capacity = 0; + m_array = NULL; + } + + ~PtrArray() + { + if (m_capacity > 0) + free(m_array); + } + + int32_t GetSize() + { + return m_size; + } + + void Add(T element) + { + if (m_size == m_capacity) + Grow(); + m_array[m_size] = element; + m_size++; + } + + T Get(int32_t index) + { + assert(index < m_size); + return m_array[index]; + } +}; + +#endif From e4b69f3b041704b4b1728b2f55981ec12a5dd6d1 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 17 Feb 2025 23:12:11 +0200 Subject: [PATCH 06/16] Finish basic block support and implement branches From the initial set of basic blocks we will start generating code for the current method. Each basic block has a stack state, representing the state of the IL stack at the moment when we enter. Codegen starts from a dummy entry bblock which has an empty stack state. As we iterate over instructions, we check if a new bblock starts at this offset (this is determined based on the m_ppOffsetToBB mapping created in a previous commit). If we are starting generating code inside a new bblock, various things need to be done. We need to check if the basic block is a fall through from the previous bblock (in which case we initialize the stack state of this new bblock). Also if the bblock is not fall through, we can only generate code into it if the stack state is initialized. Otherwise, we will skip through its instructions and, once the traverse the entire IL, we will do a reiteration generating code only in the bblocks that haven't yet been emitted. When emitting the final code, one basic block at a time, we update the real native offset of each basic block. When we need to emit a branch, in case we know the offset of the target bblock, we emit it directly in the code. Otherwise we add a relocation record in an array. Once we finish generating code for all basic blocks, we traverse the relocation array and patch all recorded instruction slots with the now known branch offset. --- src/coreclr/interpreter/compiler.cpp | 586 +++++++++++++++++++- src/coreclr/interpreter/compiler.h | 46 +- src/coreclr/interpreter/interpretershared.h | 10 +- src/coreclr/interpreter/intops.cpp | 52 ++ src/coreclr/interpreter/intops.def | 51 ++ src/coreclr/interpreter/intops.h | 15 + src/coreclr/interpreter/intopsshared.h | 15 + src/coreclr/vm/interpexec.cpp | 152 ++++- 8 files changed, 888 insertions(+), 39 deletions(-) create mode 100644 src/coreclr/interpreter/intopsshared.h diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index c713e6ccc12213..1a80a235a09945 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -177,7 +177,14 @@ bool InterpCompiler::InsIsNop(InterpInst *ins) int32_t InterpCompiler::GetInsLength(InterpInst *ins) { - return g_interpOpLen[ins->opcode]; + int len = g_interpOpLen[ins->opcode]; + if (len == 0) + { + assert(ins->opcode == INTOP_SWITCH); + len = 3 + ins->data[0]; + } + + return len; } void InterpCompiler::ForEachInsSVar(InterpInst *ins, void *pData, void (InterpCompiler::*callback)(int*, void*)) @@ -372,7 +379,73 @@ int32_t InterpCompiler::InterpGetMovForType(InterpType interpType, bool signExte return -1; } -int32_t InterpCompiler::CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE clsHnd, int size) +// This method needs to be called when the current basic blocks ends and execution can +// continue into pTargetBB. When the stack state of a basic block is initialized, the vars +// associated with the stack state are set. When another bblock will continue execution +// into this bblock, it will first have to emit moves from the vars in its stack state +// to the vars of the target bblock stack state. +void InterpCompiler::EmitBBEndVarMoves(InterpBasicBlock *pTargetBB) +{ + if (pTargetBB->stackHeight <= 0) + return; + + for (int i = 0; i < pTargetBB->stackHeight; i++) + { + int sVar = m_pStackPointer[i].var; + int dVar = pTargetBB->pStackState[i].var; + if (sVar != dVar) + { + InterpType interpType = m_pVars[sVar].interpType; + int32_t movOp = InterpGetMovForType(interpType, false); + + AddIns(movOp); + m_pLastIns->SetSVar(m_pStackPointer[i].var); + m_pLastIns->SetDVar(pTargetBB->pStackState[i].var); + + if (interpType == InterpTypeVT) + { + assert(m_pVars[sVar].size == m_pVars[dVar].size); + m_pLastIns->data[0] = m_pVars[sVar].size; + } + } + } +} + +static void MergeStackTypeInfo(StackInfo *pState1, StackInfo *pState2, int len) +{ + // Discard type information if we have type conflicts for stack contents + for (int i = 0; i < len; i++) + { + if (pState1[i].clsHnd != pState2[i].clsHnd) + { + pState1[i].clsHnd = NULL; + pState2[i].clsHnd = NULL; + } + } +} + +// Initializes stack state at entry to bb, based on the current stack state +void InterpCompiler::InitBBStackState(InterpBasicBlock *pBB) +{ + if (pBB->stackHeight >= 0) + { + // Already initialized, update stack information + MergeStackTypeInfo(m_pStackBase, pBB->pStackState, pBB->stackHeight); + } + else + { + pBB->stackHeight = (int32_t)(m_pStackPointer - m_pStackBase); + if (pBB->stackHeight > 0) + { + int size = pBB->stackHeight * sizeof (StackInfo); + pBB->pStackState = (StackInfo*)AllocMemPool(size); + memcpy (pBB->pStackState, m_pStackBase, size); + } + } +} + + +int32_t InterpCompiler::CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size) { if (m_varsSize == m_varsCapacity) { m_varsCapacity *= 2; @@ -382,7 +455,7 @@ int32_t InterpCompiler::CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE cl } InterpVar *var = &m_pVars[m_varsSize]; - var->mt = mt; + var->interpType = interpType; var->clsHnd = clsHnd; var->size = size; var->indirects = 0; @@ -408,7 +481,19 @@ void InterpCompiler::EnsureStack(int additional) do \ { \ if (!CheckStackHelper (n)) \ - goto exit; \ + goto exit_bad_code; \ + } while (0) + +#define CHECK_STACK_RET_VOID(n) \ + do { \ + if (!CheckStackHelper(n)) \ + return; \ + } while (0) + +#define CHECK_STACK_RET(n, ret) \ + do { \ + if (!CheckStackHelper(n)) \ + return ret; \ } while (0) bool InterpCompiler::CheckStackHelper(int n) @@ -459,42 +544,110 @@ int32_t InterpCompiler::ComputeCodeSize() return codeSize; } -int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins) +int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, PtrArray *relocs) { int32_t opcode = ins->opcode; int32_t *startIp = ip; *ip++ = opcode; - if (g_interpOpDVars[opcode]) - *ip++ = m_pVars[ins->dVar].offset; - - if (g_interpOpSVars[opcode]) + if (opcode == INTOP_SWITCH) + { + int32_t numLabels = ins->data [0]; + *ip++ = m_pVars[ins->sVars[0]].offset; + *ip++ = numLabels; + // Add relocation for each label + for (int32_t i = 0; i < numLabels; i++) + { + Reloc *reloc = (Reloc*)AllocMemPool(sizeof(Reloc)); + reloc->type = RelocSwitch; + reloc->offset = (int32_t)(ip - m_pMethodCode); + reloc->pTargetBB = ins->info.ppTargetBBTable [i]; + relocs->Add(reloc); + *ip++ = (int32_t)0xdeadbeef; + } + } + else if (InterpOpIsUncondBranch(opcode) || InterpOpIsCondBranch(opcode)) { + int32_t brBaseOffset = (int32_t)(startIp - m_pMethodCode); for (int i = 0; i < g_interpOpSVars[opcode]; i++) + *ip++ = m_pVars[ins->sVars[i]].offset; + + if (ins->info.pTargetBB->nativeOffset >= 0) { - if (ins->sVars[i] == CALL_ARGS_SVAR) - { - *ip++ = m_paramAreaOffset + ins->info.pCallInfo->callOffset; - } - else + *ip++ = ins->info.pTargetBB->nativeOffset - brBaseOffset; + } + else if (opcode == INTOP_BR && ins->info.pTargetBB == m_pCBB->pNextBB) + { + // Ignore branch to the next basic block. Revert the added INTOP_BR. + ip--; + } + else + { + // We don't know yet the IR offset of the target, add a reloc instead + Reloc *reloc = (Reloc*)AllocMemPool(sizeof(Reloc)); + reloc->type = RelocLongBranch; + reloc->skip = g_interpOpSVars[opcode]; + reloc->offset = brBaseOffset; + reloc->pTargetBB = ins->info.pTargetBB; + relocs->Add(reloc); + *ip++ = (int32_t)0xdeadbeef; + } + } + else + { + if (g_interpOpDVars[opcode]) + *ip++ = m_pVars[ins->dVar].offset; + + if (g_interpOpSVars[opcode]) + { + for (int i = 0; i < g_interpOpSVars[opcode]; i++) { - *ip++ = m_pVars[ins->sVars[i]].offset; + if (ins->sVars[i] == CALL_ARGS_SVAR) + { + *ip++ = m_paramAreaOffset + ins->info.pCallInfo->callOffset; + } + else + { + *ip++ = m_pVars[ins->sVars[i]].offset; + } } } - } - int left = GetInsLength(ins) - (int32_t)(ip - startIp); - // Emit the rest of the data - for (int i = 0; i < left; i++) - *ip++ = ins->data[i]; + int left = GetInsLength(ins) - (int32_t)(ip - startIp); + // Emit the rest of the data + for (int i = 0; i < left; i++) + *ip++ = ins->data[i]; + } return ip; } +void InterpCompiler::PatchRelocations(PtrArray *relocs) +{ + int32_t size = relocs->GetSize(); + + for (int32_t i = 0; i < size; i++) + { + Reloc *reloc = relocs->Get(i); + int32_t offset = reloc->pTargetBB->nativeOffset - reloc->offset; + int32_t *pSlot = NULL; + + if (reloc->type == RelocLongBranch) + pSlot = m_pMethodCode + reloc->offset + reloc->skip + 1; + else if (reloc->type == RelocSwitch) + pSlot = m_pMethodCode + reloc->offset; + else + assert(0); + + assert(*pSlot == (int32_t)0xdeadbeef); + *pSlot = offset; + } +} void InterpCompiler::EmitCode() { + PtrArray relocs; int32_t codeSize = ComputeCodeSize(); m_pMethodCode = (int32_t*)AllocMethodData(codeSize * sizeof(int32_t)); @@ -502,13 +655,16 @@ void InterpCompiler::EmitCode() for (InterpBasicBlock *bb = m_pEntryBB; bb != NULL; bb = bb->pNextBB) { bb->nativeOffset = (int32_t)(ip - m_pMethodCode); + m_pCBB = bb; for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext) { - ip = EmitCodeIns(ip, ins); + ip = EmitCodeIns(ip, ins, &relocs); } } m_MethodCodeSize = (int32_t)(ip - m_pMethodCode); + + PatchRelocations(&relocs); } InterpMethod* InterpCompiler::CreateInterpMethod() @@ -674,26 +830,216 @@ bool InterpCompiler::CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo) return true; } +// ilOffset represents relative branch offset +void InterpCompiler::EmitBranch(InterpOpcode opcode, int32_t ilOffset) +{ + int32_t target = (int32_t)(m_ip - m_pILCode) + ilOffset; + if (target < 0 || target >= m_ILCodeSize) + assert(0); + + InterpBasicBlock *pTargetBB = m_ppOffsetToBB[target]; + assert(pTargetBB != NULL); + + EmitBBEndVarMoves(pTargetBB); + InitBBStackState(pTargetBB); + + AddIns(opcode); + m_pLastIns->info.pTargetBB = pTargetBB; +} + +void InterpCompiler::EmitOneArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize) +{ + CHECK_STACK_RET_VOID(1); + StackType argType = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeMP) ? StackTypeI : m_pStackPointer[-1].type; + // offset the opcode to obtain the type specific I4/I8/R4/R8 variant. + InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType - StackTypeI4); + m_pStackPointer--; + if (ilOffset) + { + EmitBranch(opcodeArgType, ilOffset + insSize); + m_pLastIns->SetSVar(m_pStackPointer[0].var); + } + else + { + AddIns(INTOP_NOP); + } +} + +void InterpCompiler::EmitTwoArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize) +{ + CHECK_STACK_RET_VOID(2); + StackType argType1 = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeMP) ? StackTypeI : m_pStackPointer[-1].type; + StackType argType2 = (m_pStackPointer[-2].type == StackTypeO || m_pStackPointer[-2].type == StackTypeMP) ? StackTypeI : m_pStackPointer[-2].type; + + // Since branch opcodes only compare args of the same type, handle implicit conversions before + // emitting the conditional branch + if (argType1 == StackTypeI4 && argType2 == StackTypeI8) + { + EmitConv(m_pStackPointer - 1, m_pLastIns, StackTypeI8, INTOP_CONV_I8_I4); + argType1 = StackTypeI8; + } + else if (argType1 == StackTypeI8 && argType2 == StackTypeI4) + { + EmitConv(m_pStackPointer - 2, m_pLastIns, StackTypeI8, INTOP_CONV_I8_I4); + } + else if (argType1 == StackTypeR4 && argType2 == StackTypeR8) + { + EmitConv(m_pStackPointer - 1, m_pLastIns, StackTypeR8, INTOP_CONV_R8_R4); + argType1 = StackTypeR8; + } + else if (argType1 == StackTypeR8 && argType2 == StackTypeR4) + { + EmitConv(m_pStackPointer - 2, m_pLastIns, StackTypeR8, INTOP_CONV_R8_R4); + } + else if (argType1 != argType2) + { + m_hasInvalidCode = true; + return; + } + + // offset the opcode to obtain the type specific I4/I8/R4/R8 variant. + InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType1 - StackTypeI4); + m_pStackPointer -= 2; + + if (ilOffset) + { + EmitBranch(opcodeArgType, ilOffset + insSize); + m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + } + else + { + AddIns(INTOP_NOP); + } +} + int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) { - m_ip = methodInfo->ILCode; - uint8_t *codeEnd = m_ip + methodInfo->ILCodeSize; + uint8_t *codeEnd; + bool emittedBBlocks, linkBBlocks, needsRetryEmit; + m_ip = m_pILCode = methodInfo->ILCode; + m_ILCodeSize = (int32_t)methodInfo->ILCodeSize; m_stackCapacity = methodInfo->maxStack + 1; m_pStackBase = m_pStackPointer = (StackInfo*)AllocTemporary(sizeof(StackInfo) * m_stackCapacity); - m_pCBB = m_pEntryBB = AllocBB(); + m_pEntryBB = AllocBB(); m_pEntryBB->ilOffset = 0; + m_pEntryBB->emitState = BBStateEmitting; + m_pEntryBB->stackHeight = 0; + m_pCBB = m_pEntryBB; if (!CreateBasicBlocks(methodInfo)) { - // FIXME error return for compilation failure m_hasInvalidCode = true; - goto exit; + goto exit_bad_code; } + codeEnd = m_ip + m_ILCodeSize; + + linkBBlocks = true; + needsRetryEmit = false; +retry_emit: + emittedBBlocks = false; while (m_ip < codeEnd) { + // Check here for every opcode to avoid code bloat + if (m_hasInvalidCode) + goto exit_bad_code; + + int32_t insOffset = (int32_t)(m_ip - m_pILCode); + InterpBasicBlock *pNewBB = m_ppOffsetToBB[insOffset]; + if (pNewBB != NULL && m_pCBB != pNewBB) + { + // If we were emitting into previous bblock, we are finished now + if (m_pCBB->emitState == BBStateEmitting) + m_pCBB->emitState = BBStateEmitted; + // If the new bblock was already emitted, skip its instructions + if (pNewBB->emitState == BBStateEmitted) + { + if (linkBBlocks) + { + LinkBBs(m_pCBB, pNewBB); + // Further emitting can only start at a point where the bblock is not fallthrough + linkBBlocks = false; + } + // If the bblock was fully emitted it means we already iterated at least once over + // all instructions so we have `pNextBB` initialized, unless it is the last bblock. + // Skip through all emitted bblocks. + m_pCBB = pNewBB; + while (m_pCBB->pNextBB && m_pCBB->pNextBB->emitState == BBStateEmitted) + m_pCBB = m_pCBB->pNextBB; + + if (m_pCBB->pNextBB) + m_ip = m_pILCode + m_pCBB->pNextBB->ilOffset; + else + m_ip = codeEnd; + + continue; + } + else + { + assert (pNewBB->emitState == BBStateNotEmitted); + } + // We are starting a new basic block. Change cbb and link them together + if (linkBBlocks) + { + // By default we link cbb with the new starting bblock, unless the previous + // instruction is an unconditional branch (BR, LEAVE, ENDFINALLY) + LinkBBs(m_pCBB, pNewBB); + EmitBBEndVarMoves(pNewBB); + pNewBB->emitState = BBStateEmitting; + emittedBBlocks = true; + if (pNewBB->stackHeight >= 0) + { + MergeStackTypeInfo(m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight); + // This is relevant only for copying the vars associated with the values on the stack + memcpy(m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight * sizeof(StackInfo)); + m_pStackPointer = m_pStackBase + pNewBB->stackHeight; + } + else + { + // This bblock has not been branched to yet. Initialize its stack state + InitBBStackState(pNewBB); + } + // linkBBlocks remains true, which is the default + } + else + { + if (pNewBB->stackHeight >= 0) + { + // This is relevant only for copying the vars associated with the values on the stack + memcpy (m_pStackBase, pNewBB->pStackState, pNewBB->stackHeight * sizeof(StackInfo)); + m_pStackPointer = m_pStackBase + pNewBB->stackHeight; + pNewBB->emitState = BBStateEmitting; + emittedBBlocks = true; + linkBBlocks = true; + } + else + { + assert(pNewBB->emitState == BBStateNotEmitted); + needsRetryEmit = true; + // linking to its next bblock, if its the case, will only happen + // after we actually emit the bblock + linkBBlocks = false; + // If we had pNewBB->pNextBB initialized, here we could skip to its il offset directly. + // We will just skip all instructions instead, since it doesn't seem that problematic. + } + } + if (!m_pCBB->pNextBB) + m_pCBB->pNextBB = pNewBB; + m_pCBB = pNewBB; + } + + int32_t opcodeSize = CEEOpcodeSize(m_ip, codeEnd); + if (m_pCBB->emitState != BBStateEmitting) + { + // If we are not really emitting, just skip the instructions in the bblock + m_ip += opcodeSize; + continue; + } + + m_ppOffsetToBB[insOffset] = m_pCBB; + uint8_t opcode = *m_ip; switch (opcode) { @@ -1063,12 +1409,198 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) } m_ip++; break; + case CEE_SWITCH: + { + m_ip++; + uint32_t n = getU4LittleEndian(m_ip); + // Format of switch instruction is opcode + srcVal + n + T1 + T2 + ... + Tn + AddInsExplicit(INTOP_SWITCH, n + 3); + m_pLastIns->data[0] = n; + m_ip += 4; + const uint8_t *nextIp = m_ip + n * 4; + m_pStackPointer--; + m_pLastIns->SetSVar(m_pStackPointer->var); + InterpBasicBlock **targetBBTable = (InterpBasicBlock**)AllocMemPool(sizeof (InterpBasicBlock*) * n); + + for (uint32_t i = 0; i < n; i++) + { + int32_t offset = getU4LittleEndian(m_ip); + uint32_t target = (uint32_t)(nextIp - m_pILCode + offset); + InterpBasicBlock *targetBB = m_ppOffsetToBB[target]; + assert(targetBB); + + InitBBStackState(targetBB); + targetBBTable[i] = targetBB; + LinkBBs(m_pCBB, targetBB); + m_ip += 4; + } + m_pLastIns->info.ppTargetBBTable = targetBBTable; + break; + } + case CEE_BR: + { + int32_t offset = getI4LittleEndian(m_ip + 1); + if (offset) + { + EmitBranch(INTOP_BR, 5 + offset); + linkBBlocks = false; + } + m_ip += 5; + break; + } + case CEE_BR_S: + { + int32_t offset = (int8_t)m_ip [1]; + if (offset) + { + EmitBranch(INTOP_BR, 2 + (int8_t)m_ip [1]); + linkBBlocks = false; + } + m_ip += 2; + break; + } + case CEE_BRFALSE: + EmitOneArgBranch(INTOP_BRFALSE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BRFALSE_S: + EmitOneArgBranch(INTOP_BRFALSE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BRTRUE: + EmitOneArgBranch(INTOP_BRTRUE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BRTRUE_S: + EmitOneArgBranch(INTOP_BRTRUE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BEQ: + EmitTwoArgBranch(INTOP_BEQ_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BEQ_S: + EmitTwoArgBranch(INTOP_BEQ_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGE: + EmitTwoArgBranch(INTOP_BGE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGE_S: + EmitTwoArgBranch(INTOP_BGE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGT: + EmitTwoArgBranch(INTOP_BGT_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGT_S: + EmitTwoArgBranch(INTOP_BGT_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLT: + EmitTwoArgBranch(INTOP_BLT_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLT_S: + EmitTwoArgBranch(INTOP_BLT_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLE: + EmitTwoArgBranch(INTOP_BLE_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLE_S: + EmitTwoArgBranch(INTOP_BLE_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BNE_UN: + EmitTwoArgBranch(INTOP_BNE_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BNE_UN_S: + EmitTwoArgBranch(INTOP_BNE_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGE_UN: + EmitTwoArgBranch(INTOP_BGE_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGE_UN_S: + EmitTwoArgBranch(INTOP_BGE_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BGT_UN: + EmitTwoArgBranch(INTOP_BGT_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BGT_UN_S: + EmitTwoArgBranch(INTOP_BGT_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLE_UN: + EmitTwoArgBranch(INTOP_BLE_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLE_UN_S: + EmitTwoArgBranch(INTOP_BLE_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + case CEE_BLT_UN: + EmitTwoArgBranch(INTOP_BLT_UN_I4, getI4LittleEndian(m_ip + 1), 5); + m_ip += 5; + break; + case CEE_BLT_UN_S: + EmitTwoArgBranch(INTOP_BLT_UN_I4, (int8_t)m_ip [1], 2); + m_ip += 2; + break; + default: assert(0); break; } } -exit: + if (m_pCBB->emitState == BBStateEmitting) + m_pCBB->emitState = BBStateEmitted; + + // If no bblocks were emitted during the last iteration, there is no point to try again + // Some bblocks are just unreachable in the code. + if (needsRetryEmit && emittedBBlocks) + { + m_ip = m_pILCode; + m_pCBB = m_pEntryBB; + + linkBBlocks = false; + needsRetryEmit = false; + goto retry_emit; + } + + UnlinkUnreachableBBlocks(); + return CORJIT_OK; +exit_bad_code: + return CORJIT_BADCODE; +} + +void InterpCompiler::UnlinkUnreachableBBlocks() +{ + // Unlink unreachable bblocks, prevBB is always an emitted bblock + InterpBasicBlock *prevBB = m_pEntryBB; + InterpBasicBlock *nextBB = prevBB->pNextBB; + while (nextBB != NULL) + { + if (nextBB->emitState == BBStateNotEmitted) + { + m_ppOffsetToBB[nextBB->ilOffset] = NULL; + prevBB->pNextBB = nextBB->pNextBB; + nextBB = prevBB->pNextBB; + } + else + { + prevBB = nextBB; + nextBB = nextBB->pNextBB; + } + } } diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 092dd1831096d3..267150c2949247 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -5,6 +5,7 @@ #define _COMPILER_H_ #include "intops.h" +#include "datastructs.h" // Types that can exist on the IL execution stack. They are used only during // IL import compilation stage. @@ -100,12 +101,21 @@ struct InterpInst #define CALL_ARGS_SVAR -2 +struct StackInfo; + +enum InterpBBState +{ + BBStateNotEmitted, + BBStateEmitting, + BBStateEmitted +}; struct InterpBasicBlock { int32_t index; int32_t ilOffset, nativeOffset; int32_t stackHeight; + StackInfo *pStackState; InterpInst *pFirstIns, *pLastIns; InterpBasicBlock *pNextBB; @@ -113,12 +123,14 @@ struct InterpBasicBlock int inCount, outCount; InterpBasicBlock **ppInBBs; InterpBasicBlock **ppOutBBs; + + InterpBBState emitState; }; struct InterpVar { CORINFO_CLASS_HANDLE clsHnd; - InterpType mt; + InterpType interpType; int indirects; int offset; int size; @@ -148,6 +160,22 @@ struct StackInfo } }; +enum RelocType +{ + RelocLongBranch, + RelocSwitch +}; + +struct Reloc +{ + RelocType type; + // For branch relocation, how many sVar slots to skip + int skip; + // Base offset that the relative offset to be embedded in IR applies to + int32_t offset; + InterpBasicBlock *pTargetBB; +}; + typedef class ICorJitInfo* COMP_HANDLE; class InterpCompiler @@ -160,6 +188,8 @@ class InterpCompiler static int32_t InterpGetMovForType(InterpType interpType, bool signExtend); uint8_t* m_ip; + uint8_t* m_pILCode; + int32_t m_ILCodeSize; int GenerateCode(CORINFO_METHOD_INFO* methodInfo); @@ -198,12 +228,21 @@ class InterpCompiler void LinkBBs(InterpBasicBlock *from, InterpBasicBlock *to); void UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to); + void EmitBranch(InterpOpcode opcode, int ilOffset); + void EmitOneArgBranch(InterpOpcode opcode, int ilOffset, int insSize); + void EmitTwoArgBranch(InterpOpcode opcode, int ilOffset, int insSize); + void AddConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp); + + void EmitBBEndVarMoves(InterpBasicBlock *pTargetBB); + void InitBBStackState(InterpBasicBlock *pBB); + void UnlinkUnreachableBBlocks(); + // Vars InterpVar *m_pVars = NULL; int32_t m_varsSize = 0; int32_t m_varsCapacity = 0; - int32_t CreateVarExplicit(InterpType mt, CORINFO_CLASS_HANDLE clsHnd, int size); + int32_t CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size); int32_t m_totalVarsStackSize = 0; int32_t m_paramAreaOffset = 0; @@ -232,7 +271,8 @@ class InterpCompiler void AllocOffsets(); int32_t ComputeCodeSize(); void EmitCode(); - int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns); + int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns, PtrArray *relocs); + void PatchRelocations(PtrArray *relocs); InterpMethod* CreateInterpMethod(); bool CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo); public: diff --git a/src/coreclr/interpreter/interpretershared.h b/src/coreclr/interpreter/interpretershared.h index f977aac32b8dbc..8982dc0c201deb 100644 --- a/src/coreclr/interpreter/interpretershared.h +++ b/src/coreclr/interpreter/interpretershared.h @@ -6,17 +6,11 @@ #ifndef _INTERPRETERSHARED_H_ #define _INTERPRETERSHARED_H_ +#include "intopsshared.h" + #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame -#define OPDEF(a,b,c,d,e,f) a, -typedef enum -{ -#include "intops.def" - INTOP_LAST -} InterpOpcode; -#undef OPDEF - struct InterpMethod { CORINFO_METHOD_HANDLE methodHnd; diff --git a/src/coreclr/interpreter/intops.cpp b/src/coreclr/interpreter/intops.cpp index aaed470714b9a7..dc31e8f3af2de0 100644 --- a/src/coreclr/interpreter/intops.cpp +++ b/src/coreclr/interpreter/intops.cpp @@ -4,6 +4,7 @@ #include "intops.h" #include +#include // This, instead of an array of pointers, to optimize away a pointer and a relocation per string. struct InterpOpNameCharacters @@ -111,3 +112,54 @@ OPCODE CEEDecodeOpcode(const uint8_t **pIp) *pIp = ip; return res; } + +int32_t CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd) +{ + const uint8_t *p = ip; + OPCODE opcode = CEEDecodeOpcode(&p); + OPCODE_FORMAT opArgs = g_CEEOpArgs[opcode]; + + size_t size = 0; + + switch (opArgs) + { + case InlineNone: + size = 1; + break; + case InlineString: + case InlineType: + case InlineField: + case InlineMethod: + case InlineTok: + case InlineSig: + case ShortInlineR: + case InlineI: + case InlineBrTarget: + size = 5; + break; + case InlineVar: + size = 3; + break; + case ShortInlineVar: + case ShortInlineI: + case ShortInlineBrTarget: + size = 2; + break; + case InlineR: + case InlineI8: + size = 9; + break; + case InlineSwitch: { + size_t entries = getI4LittleEndian(p + 1); + size = 5 + 4 * entries; + break; + } + default: + assert(0); + } + + if ((ip + size) >= codeEnd) + return -1; + + return (int32_t)((p - ip) + size); +} diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 293fa5282f99b8..08488c6e7eba9c 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -20,6 +20,57 @@ OPDEF(INTOP_MOV_4, "mov.4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_8, "mov.8", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_VT, "mov.vt", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_SWITCH, "switch", 0, 0, 1, InterpOpSwitch) + +OPDEF(INTOP_BR, "br", 2, 0, 0, InterpOpBranch) + +OPDEF(INTOP_BRFALSE_I4, "brfalse.i4", 3, 0, 1, InterpOpBranch) +OPDEF(INTOP_BRFALSE_I8, "brfalse.i8", 3, 0, 1, InterpOpBranch) +OPDEF(INTOP_BRTRUE_I4, "brtrue.i4", 3, 0, 1, InterpOpBranch) +OPDEF(INTOP_BRTRUE_I8, "brtrue.i8", 3, 0, 1, InterpOpBranch) + +OPDEF(INTOP_BEQ_I4, "beq.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BEQ_I8, "beq.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BEQ_R4, "beq.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BEQ_R8, "beq.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_I4, "bge.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_I8, "bge.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_R4, "bge.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_R8, "bge.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_I4, "bgt.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_I8, "bgt.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_R4, "bgt.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_R8, "bgt.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_I4, "blt.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_I8, "blt.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_R4, "blt.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_R8, "blt.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_I4, "ble.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_I8, "ble.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_R4, "ble.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_R8, "ble.r8", 4, 0, 2, InterpOpBranch) + +OPDEF(INTOP_BNE_UN_I4, "bne.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BNE_UN_I8, "bne.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BNE_UN_R4, "bne.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BNE_UN_R8, "bne.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_I4, "bge.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_I8, "bge.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_R4, "bge.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGE_UN_R8, "bge.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_I4, "bgt.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_I8, "bgt.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_R4, "bgt.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BGT_UN_R8, "bgt.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_I4, "ble.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_I8, "ble.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_R4, "ble.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLE_UN_R8, "ble.un.r8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_I4, "blt.un.i4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_I8, "blt.un.i8", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_R4, "blt.un.r4", 4, 0, 2, InterpOpBranch) +OPDEF(INTOP_BLT_UN_R8, "blt.un.r8", 4, 0, 2, InterpOpBranch) + OPDEF(INTOP_CONV_R_UN_I4, "conv.r.un.i4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_CONV_R_UN_I8, "conv.r.un.i8", 3, 1, 1, InterpOpNoArgs) diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index 3668e9a1eeb4fc..ad462c09761713 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -7,10 +7,14 @@ #include "openum.h" #include +#include "intopsshared.h" + typedef enum { InterpOpNoArgs, InterpOpInt, + InterpOpBranch, + InterpOpSwitch, } InterpOpArgType; extern const uint8_t g_interpOpLen[]; @@ -29,6 +33,7 @@ const char* InterpOpName(int op); extern OPCODE_FORMAT const g_CEEOpArgs[]; const char* CEEOpName(OPCODE op); OPCODE CEEDecodeOpcode(const uint8_t **ip); +int CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd); #ifdef TARGET_64BIT #define INTOP_MOV_P INTOP_MOV_8 @@ -36,6 +41,16 @@ OPCODE CEEDecodeOpcode(const uint8_t **ip); #define INTOP_MOV_P INTOP_MOV_4 #endif +static inline bool InterpOpIsUncondBranch(int32_t opcode) +{ + return opcode == INTOP_BR; +} + +static inline bool InterpOpIsCondBranch(int32_t opcode) +{ + return opcode >= INTOP_BRFALSE_I4 && opcode <= INTOP_BLT_UN_R8; +} + // Helpers identical to ones used by JIT // FIXME how to consume GET_UNALIGNED_VAL defines from pal as jit ??? // diff --git a/src/coreclr/interpreter/intopsshared.h b/src/coreclr/interpreter/intopsshared.h new file mode 100644 index 00000000000000..80be6d2f53d946 --- /dev/null +++ b/src/coreclr/interpreter/intopsshared.h @@ -0,0 +1,15 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _INTOPSSHARED_H_ +#define _INTOPSSHARED_H_ + +#define OPDEF(a,b,c,d,e,f) a, +typedef enum +{ +#include "intops.def" + INTOP_LAST +} InterpOpcode; +#undef OPDEF + +#endif diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index b47e28757af767..65c1651f17df76 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -201,13 +201,163 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh LOCAL_VAR(ip[1], double) = (double)LOCAL_VAR(ip[2], float); ip += 3; break; - case INTOP_CONV_U8_R4: case INTOP_CONV_U8_R8: // TODO assert(0); break; + case INTOP_SWITCH: + { + uint32_t val = LOCAL_VAR(ip[1], uint32_t); + uint32_t n = ip[2]; + ip += 3; + if (val < n) + { + ip += val; + ip += *ip; + } + else + { + ip += n; + } + break; + } + + case INTOP_BR: + ip += ip[1]; + break; + +#define BR_UNOP(datatype, op) \ + if (LOCAL_VAR(ip[1], datatype) op) \ + ip += ip[2]; \ + else \ + ip += 3; + + case INTOP_BRFALSE_I4: + BR_UNOP(int32_t, == 0); + break; + case INTOP_BRFALSE_I8: + BR_UNOP(int64_t, == 0); + break; + case INTOP_BRTRUE_I4: + BR_UNOP(int32_t, != 0); + break; + case INTOP_BRTRUE_I8: + BR_UNOP(int64_t, != 0); + break; + +#define BR_BINOP_COND(cond) \ + if (cond) \ + ip += ip[3]; \ + else \ + ip += 4; + +#define BR_BINOP(datatype, op) \ + BR_BINOP_COND(LOCAL_VAR(ip[1], datatype) op LOCAL_VAR(ip[2], datatype)) + + case INTOP_BEQ_I4: + BR_BINOP(int32_t, ==); + break; + case INTOP_BEQ_I8: + BR_BINOP(int64_t, ==); + break; + case INTOP_BEQ_R4: + case INTOP_BEQ_R8: + // TODO Floating point comparisons + assert(0); + break; + case INTOP_BGE_I4: + BR_BINOP(int32_t, >=); + break; + case INTOP_BGE_I8: + BR_BINOP(int64_t, >=); + break; + case INTOP_BGE_R4: + case INTOP_BGE_R8: + assert(0); + break; + case INTOP_BGT_I4: + BR_BINOP(int32_t, >); + break; + case INTOP_BGT_I8: + BR_BINOP(int64_t, >); + break; + case INTOP_BGT_R4: + case INTOP_BGT_R8: + assert(0); + break; + case INTOP_BLT_I4: + BR_BINOP(int32_t, <); + break; + case INTOP_BLT_I8: + BR_BINOP(int64_t, <); + break; + case INTOP_BLT_R4: + case INTOP_BLT_R8: + assert(0); + break; + case INTOP_BLE_I4: + BR_BINOP(int32_t, <=); + break; + case INTOP_BLE_I8: + BR_BINOP(int64_t, <=); + break; + case INTOP_BLE_R4: + case INTOP_BLE_R8: + assert(0); + break; + case INTOP_BNE_UN_I4: + BR_BINOP(uint32_t, !=); + break; + case INTOP_BNE_UN_I8: + BR_BINOP(uint64_t, !=); + break; + case INTOP_BNE_UN_R4: + case INTOP_BNE_UN_R8: + assert(0); + break; + case INTOP_BGE_UN_I4: + BR_BINOP(uint32_t, >=); + break; + case INTOP_BGE_UN_I8: + BR_BINOP(uint64_t, >=); + break; + case INTOP_BGE_UN_R4: + case INTOP_BGE_UN_R8: + assert(0); + break; + case INTOP_BGT_UN_I4: + BR_BINOP(uint32_t, >); + break; + case INTOP_BGT_UN_I8: + BR_BINOP(uint64_t, >); + break; + case INTOP_BGT_UN_R4: + case INTOP_BGT_UN_R8: + assert(0); + break; + case INTOP_BLE_UN_I4: + BR_BINOP(uint32_t, <=); + break; + case INTOP_BLE_UN_I8: + BR_BINOP(uint64_t, <=); + break; + case INTOP_BLE_UN_R4: + case INTOP_BLE_UN_R8: + assert(0); + break; + case INTOP_BLT_UN_I4: + BR_BINOP(uint32_t, <); + break; + case INTOP_BLT_UN_I8: + BR_BINOP(uint64_t, <); + break; + case INTOP_BLT_UN_R4: + case INTOP_BLT_UN_R8: + assert(0); + break; + default: assert(0); break; From 9026daf45a346eaac1dda7dda3d3a97e778cc7c5 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Wed, 19 Feb 2025 21:37:37 +0200 Subject: [PATCH 07/16] Add support for ldarg + ldloc Add a new InterpTypeByRef, to be used to precisely track vars that might contain interior pointers and need to be reported to the GC. We didn't have this type on mono. --- src/coreclr/interpreter/compiler.cpp | 288 +++++++++++++++++++++++++-- src/coreclr/interpreter/compiler.h | 17 +- 2 files changed, 289 insertions(+), 16 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 1a80a235a09945..bb97c027ae87bb 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -13,19 +13,20 @@ static const StackType g_stackTypeFromInterpType[] = StackTypeR4, // R4 StackTypeR8, // R8 StackTypeO, // O - StackTypeVT // VT + StackTypeVT, // VT + StackTypeMP, // ByRef }; static const InterpType g_interpTypeFromStackType[] = { - InterpTypeI4, // I4, - InterpTypeI8, // I8, - InterpTypeR4, // R4, - InterpTypeR8, // R8, - InterpTypeO, // O, - InterpTypeVT, // VT, - InterpTypeI, // MP, - InterpTypeI, // F + InterpTypeI4, // I4, + InterpTypeI8, // I8, + InterpTypeR4, // R4, + InterpTypeR8, // R8, + InterpTypeO, // O, + InterpTypeVT, // VT, + InterpTypeByRef, // MP, + InterpTypeI, // F }; // FIXME Use specific allocators for their intended purpose @@ -370,6 +371,7 @@ int32_t InterpCompiler::InterpGetMovForType(InterpType interpType, bool signExte case InterpTypeR8: return INTOP_MOV_8; case InterpTypeO: + case InterpTypeByRef: return INTOP_MOV_P; case InterpTypeVT: return INTOP_MOV_VT; @@ -518,12 +520,17 @@ void InterpCompiler::PushTypeExplicit(StackType stackType, CORINFO_CLASS_HANDLE m_pStackPointer++; } -void InterpCompiler::PushType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd) +void InterpCompiler::PushStackType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd) { // We don't really care about the exact size for non-valuetypes PushTypeExplicit(stackType, clsHnd, INTERP_STACK_SLOT_SIZE); } +void InterpCompiler::PushInterpType(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd) +{ + PushStackType(g_stackTypeFromInterpType[interpType], clsHnd); +} + void InterpCompiler::PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size) { PushTypeExplicit(StackTypeVT, clsHnd, size); @@ -690,6 +697,8 @@ InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, InterpMethod* InterpCompiler::CompileMethod() { + CreateILVars(); + GenerateCode(m_methodInfo); AllocOffsets(); @@ -715,6 +724,153 @@ void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type newInst->SetDVar(var); } +static InterpType GetInterpType(CorInfoType corInfoType) +{ + switch (corInfoType) + { + case CORINFO_TYPE_BYTE: + return InterpTypeI1; + case CORINFO_TYPE_UBYTE: + case CORINFO_TYPE_BOOL: + return InterpTypeU1; + case CORINFO_TYPE_CHAR: + case CORINFO_TYPE_USHORT: + return InterpTypeU2; + case CORINFO_TYPE_SHORT: + return InterpTypeI2; + case CORINFO_TYPE_INT: + case CORINFO_TYPE_UINT: + return InterpTypeI4; + case CORINFO_TYPE_LONG: + case CORINFO_TYPE_ULONG: + return InterpTypeI8; + case CORINFO_TYPE_NATIVEINT: + case CORINFO_TYPE_NATIVEUINT: + return InterpTypeI; + case CORINFO_TYPE_FLOAT: + return InterpTypeR4; + case CORINFO_TYPE_DOUBLE: + return InterpTypeR8; + case CORINFO_TYPE_STRING: + case CORINFO_TYPE_CLASS: + return InterpTypeO; + case CORINFO_TYPE_PTR: + return InterpTypeI; + case CORINFO_TYPE_BYREF: + return InterpTypeByRef; + case CORINFO_TYPE_VALUECLASS: + case CORINFO_TYPE_REFANY: + return InterpTypeVT; + default: + assert(0); + break; + } + return InterpTypeVoid; +} + +int32_t InterpCompiler::GetInterpTypeSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign) +{ + int32_t size, align; + if (interpType == InterpTypeVT) + { + size = m_compHnd->getClassSize(clsHnd); + align = m_compHnd->getClassAlignmentRequirement(clsHnd); + + assert(align <= INTERP_STACK_ALIGNMENT); + + // All vars are stored at 8 byte aligned offsets + if (align < INTERP_STACK_SLOT_SIZE) + align = INTERP_STACK_SLOT_SIZE; + } + else + { + size = INTERP_STACK_SLOT_SIZE; // not really + align = INTERP_STACK_SLOT_SIZE; + } + *pAlign = align; + return size; +} + + +void InterpCompiler::CreateILVars() +{ + bool hasThis = m_methodInfo->args.hasThis(); + int32_t offset, size, align; + int numArgs = hasThis + m_methodInfo->args.numArgs; + int numILLocals = m_methodInfo->locals.numArgs; + int numILVars = numArgs + numILLocals; + + // add some starting extra space for new vars + m_varsCapacity = numILVars + 64; + m_pVars = (InterpVar*)AllocTemporary(m_varsCapacity * sizeof (InterpVar)); + m_varsSize = numILVars; + + offset = 0; + + CORINFO_ARG_LIST_HANDLE sigArg = m_methodInfo->args.args; + for (int i = 0; i < numArgs; i++) { + InterpType interpType; + CORINFO_CLASS_HANDLE argClass; + if (hasThis && i == 0) + { + argClass = m_compHnd->getMethodClass(m_methodInfo->ftn); + if (m_compHnd->isValueClass(argClass)) + interpType = InterpTypeByRef; + else + interpType = InterpTypeO; + } + else + { + CorInfoType argCorType; + argCorType = strip(m_compHnd->getArgType(&m_methodInfo->args, sigArg, &argClass)); + interpType = GetInterpType(argCorType); + sigArg = m_compHnd->getArgNext(sigArg); + } + + m_pVars[i].interpType = interpType; + m_pVars[i].clsHnd = argClass; + m_pVars[i].global = true; + m_pVars[i].ILGlobal = true; + m_pVars[i].indirects = 0; + + size = GetInterpTypeSize(argClass, interpType, &align); + m_pVars[i].size = size; + offset = ALIGN_UP_TO(offset, align); + m_pVars[i].offset = offset; + offset += size; + } + + offset = ALIGN_UP_TO(offset, INTERP_STACK_ALIGNMENT); + + sigArg = m_methodInfo->locals.args; + m_ILLocalsOffset = offset; + for (int i = 0; i < numILLocals; i++) { + int index = numArgs + i; + InterpType interpType; + CORINFO_CLASS_HANDLE argClass; + + CorInfoType argCorType = strip(m_compHnd->getArgType(&m_methodInfo->locals, sigArg, &argClass)); + interpType = GetInterpType(argCorType); + + m_pVars[index].interpType = interpType; + m_pVars[index].clsHnd = argClass; + m_pVars[index].global = true; + m_pVars[index].ILGlobal = true; + m_pVars[index].indirects = 0; + + size = GetInterpTypeSize(argClass, interpType, &align); + m_pVars[index].size = size; + offset = ALIGN_UP_TO(offset, align); + m_pVars[index].offset = offset; + offset += size; + sigArg = m_compHnd->getArgNext(sigArg); + } + offset = ALIGN_UP_TO(offset, INTERP_STACK_ALIGNMENT); + + m_ILLocalsSize = offset - m_ILLocalsOffset; + m_totalVarsStackSize = offset; +} + bool InterpCompiler::CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo) { int32_t codeSize = methodInfo->ILCodeSize; @@ -912,9 +1068,52 @@ void InterpCompiler::EmitTwoArgBranch(InterpOpcode opcode, int32_t ilOffset, int } } + +void InterpCompiler::EmitLoadVar(int32_t var) +{ + InterpType interpType = m_pVars[var].interpType; + int32_t size = m_pVars[var].size; + CORINFO_CLASS_HANDLE clsHnd = m_pVars[var].clsHnd; + + if (interpType == InterpTypeVT) + PushTypeVT(clsHnd, size); + else + PushInterpType(interpType, clsHnd); + + AddIns(InterpGetMovForType(interpType, true)); + m_pLastIns->SetSVar(var); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); + if (interpType == InterpTypeVT) + m_pLastIns->data[0] = size; +} + +void InterpCompiler::EmitStoreVar(int32_t var) +{ + InterpType interpType = m_pVars[var].interpType; + CHECK_STACK_RET_VOID(1); + +#ifdef TARGET_64BIT + // nint and int32 can be used interchangeably. Add implicit conversions. + if (m_pStackPointer[-1].type == StackTypeI4 && g_stackTypeFromInterpType[interpType] == StackTypeI8) + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); +#endif + if (m_pStackPointer[-1].type == StackTypeR4 && g_stackTypeFromInterpType[interpType] == StackTypeR8) + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4); + else if (m_pStackPointer[-1].type == StackTypeR8 && g_stackTypeFromInterpType[interpType] == StackTypeR4) + EmitConv(m_pStackPointer - 1, NULL, StackTypeR4, INTOP_CONV_R4_R8); + + m_pStackPointer--; + AddIns(InterpGetMovForType(interpType, false)); + m_pLastIns->SetSVar(m_pStackPointer[0].var); + m_pLastIns->SetDVar(var); + if (interpType == InterpTypeVT) + m_pLastIns->data[0] = m_pVars[var].size; +} + int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) { uint8_t *codeEnd; + int numArgs = m_methodInfo->args.hasThis() + m_methodInfo->args.numArgs; bool emittedBBlocks, linkBBlocks, needsRetryEmit; m_ip = m_pILCode = methodInfo->ILCode; m_ILCodeSize = (int32_t)methodInfo->ILCodeSize; @@ -1058,17 +1257,56 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) case CEE_LDC_I4_8: AddIns(INTOP_LDC_I4); m_pLastIns->data[0] = opcode - CEE_LDC_I4_0; - PushType(StackTypeI4, NULL); + PushStackType(StackTypeI4, NULL); m_pLastIns->SetDVar(m_pStackPointer[-1].var); m_ip++; break; case CEE_LDC_I4_S: AddIns(INTOP_LDC_I4); m_pLastIns->data[0] = (int8_t)m_ip[1]; - PushType(StackTypeI4, NULL); + PushStackType(StackTypeI4, NULL); m_pLastIns->SetDVar(m_pStackPointer[-1].var); m_ip += 2; break; + + case CEE_LDARG_S: + EmitLoadVar(m_ip[1]); + m_ip += 2; + break; + case CEE_LDARG_0: + case CEE_LDARG_1: + case CEE_LDARG_2: + case CEE_LDARG_3: + EmitLoadVar(*m_ip - CEE_LDARG_0); + m_ip++; + break; + case CEE_STARG_S: + EmitStoreVar(m_ip[1]); + m_ip += 2; + break; + case CEE_LDLOC_S: + EmitLoadVar(numArgs + m_ip[1]); + m_ip += 2; + break; + case CEE_LDLOC_0: + case CEE_LDLOC_1: + case CEE_LDLOC_2: + case CEE_LDLOC_3: + EmitLoadVar(numArgs + *m_ip - CEE_LDLOC_0); + m_ip++; + break; + case CEE_STLOC_S: + EmitStoreVar(numArgs + m_ip[1]); + m_ip += 2; + break; + case CEE_STLOC_0: + case CEE_STLOC_1: + case CEE_STLOC_2: + case CEE_STLOC_3: + EmitStoreVar(numArgs + *m_ip - CEE_STLOC_0); + m_ip++; + break; + case CEE_RET: { CORINFO_SIG_INFO sig = methodInfo->args; @@ -1556,6 +1794,32 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_ip += 2; break; + case CEE_PREFIX1: + m_ip++; + switch (*m_ip + 256) + { + case CEE_LDARG: + EmitLoadVar(getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_STARG: + EmitStoreVar(getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_LDLOC: + EmitLoadVar(numArgs + getU2LittleEndian(m_ip + 1)); + m_ip += 3; + break; + case CEE_STLOC: + EmitStoreVar(numArgs + getU2LittleEndian(m_ip + 1));\ + m_ip += 3; + break; + default: + assert(0); + break; + } + break; + default: assert(0); break; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 267150c2949247..31af3123979b0f 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -38,7 +38,8 @@ enum InterpType { InterpTypeR8, InterpTypeO, InterpTypeVT, - InterpTypeVOID, + InterpTypeByRef, + InterpTypeVoid, #ifdef TARGET_64BIT InterpTypeI = InterpTypeI8 #else @@ -137,6 +138,9 @@ struct InterpVar // live_start and live_end are used by the offset allocator int liveStart; int liveEnd; + + unsigned int global : 1; // Dedicated stack offset throughout method execution + unsigned int ILGlobal : 1; // Args and IL locals }; struct StackInfo @@ -231,7 +235,6 @@ class InterpCompiler void EmitBranch(InterpOpcode opcode, int ilOffset); void EmitOneArgBranch(InterpOpcode opcode, int ilOffset, int insSize); void EmitTwoArgBranch(InterpOpcode opcode, int ilOffset, int insSize); - void AddConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp); void EmitBBEndVarMoves(InterpBasicBlock *pTargetBB); void InitBBStackState(InterpBasicBlock *pBB); @@ -244,11 +247,14 @@ class InterpCompiler int32_t CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size); - int32_t m_totalVarsStackSize = 0; + int32_t m_totalVarsStackSize; int32_t m_paramAreaOffset = 0; + int32_t m_ILLocalsOffset, m_ILLocalsSize; void AllocVarOffsetCB(int *pVar, void *pData); int32_t AllocVarOffset(int var, int32_t *pPos); + int32_t GetInterpTypeSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign); + void CreateILVars(); // Stack StackInfo *m_pStackPointer, *m_pStackBase; @@ -258,11 +264,14 @@ class InterpCompiler bool CheckStackHelper(int n); void EnsureStack(int additional); void PushTypeExplicit(StackType stackType, CORINFO_CLASS_HANDLE clsHnd, int size); - void PushType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd); + void PushStackType(StackType stackType, CORINFO_CLASS_HANDLE clsHnd); + void PushInterpType(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd); void PushTypeVT(CORINFO_CLASS_HANDLE clsHnd, int size); // Code emit void EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp); + void EmitLoadVar(int var); + void EmitStoreVar(int var); // Passes int32_t* m_pMethodCode; From c3bc35753668b52c36b821366713785bc5796326 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sat, 22 Feb 2025 14:31:56 +0200 Subject: [PATCH 08/16] Add binary and unary operators --- src/coreclr/interpreter/compiler.cpp | 246 +++++++++++++++++++++++++++ src/coreclr/interpreter/compiler.h | 4 + src/coreclr/interpreter/intops.def | 70 ++++++++ src/coreclr/vm/interpexec.cpp | 214 ++++++++++++++++++++++- 4 files changed, 533 insertions(+), 1 deletion(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index bb97c027ae87bb..a73ecfee4b9c82 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -498,6 +498,12 @@ void InterpCompiler::EnsureStack(int additional) return ret; \ } while (0) +#define INVALID_CODE_RET_VOID \ + do { \ + m_hasInvalidCode = true; \ + return; \ + } while (0); + bool InterpCompiler::CheckStackHelper(int n) { int32_t currentSize = (int32_t)(m_pStackPointer - m_pStackBase); @@ -1110,6 +1116,181 @@ void InterpCompiler::EmitStoreVar(int32_t var) m_pLastIns->data[0] = m_pVars[var].size; } +void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(2); + StackType type1 = m_pStackPointer[-2].type; + StackType type2 = m_pStackPointer[-1].type; + + StackType typeRes; + + if (opBase == INTOP_ADD_I4 && (type1 == StackTypeMP || type2 == StackTypeMP)) + { + if (type1 == type2) + INVALID_CODE_RET_VOID; + if (type1 == StackTypeMP) + { + if (type2 == StackTypeI4) + { +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); + type2 = StackTypeI8; +#endif + typeRes = StackTypeMP; + } + else if (type2 == StackTypeI) + { + typeRes = StackTypeMP; + } + else + { + INVALID_CODE_RET_VOID; + } + } + else + { + // type2 == StackTypeMP + if (type1 == StackTypeI4) + { +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 2, NULL, StackTypeI8, INTOP_CONV_I8_I4); + type1 = StackTypeI8; +#endif + typeRes = StackTypeMP; + } + else if (type1 == StackTypeI) + { + typeRes = StackTypeMP; + } + else + { + INVALID_CODE_RET_VOID; + } + } + } + else if (opBase == INTOP_SUB_I4 && type1 == StackTypeMP) + { + if (type2 == StackTypeI4) + { +#ifdef TARGET_64BIT + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); + type2 = StackTypeI8; +#endif + typeRes = StackTypeMP; + } + else if (type2 == StackTypeI) + { + typeRes = StackTypeMP; + } + else if (type2 == StackTypeMP) + { + typeRes = StackTypeI; + } + else + { + INVALID_CODE_RET_VOID; + } + } + else + { +#if SIZEOF_VOID_P == 8 + if (type1 == StackTypeI8 && type2 == StackTypeI4) + { + EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); + type2 = StackTypeI8; + } + else if (type1 == StackTypeI4 && type2 == StackTypeI8) + { + EmitConv(m_pStackPointer - 2, NULL, StackTypeI8, INTOP_CONV_I8_I4); + type1 = StackTypeI8; + } +#endif + if (type1 == StackTypeR8 && type2 == StackTypeR4) + { + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4); + type2 = StackTypeR8; + } + else if (type1 == StackTypeR4 && type2 == StackTypeR8) + { + EmitConv(m_pStackPointer - 2, NULL, StackTypeR8, INTOP_CONV_R8_R4); + type1 = StackTypeR8; + } + if (type1 != type2) + INVALID_CODE_RET_VOID; + + typeRes = type1; + } + + // The argument opcode is for the base _I4 instruction. Depending on the type of the result + // we compute the specific variant, _I4/_I8/_R4 or R8. + int32_t typeOffset = ((typeRes == StackTypeMP) ? StackTypeI : typeRes) - StackTypeI4; + int32_t finalOpcode = opBase + typeOffset; + + m_pStackPointer -= 2; + AddIns(finalOpcode); + m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushStackType(typeRes, NULL); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitUnaryArithmeticOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(1); + StackType stackType = m_pStackPointer[-1].type; + int32_t finalOpcode = opBase + (stackType - StackTypeI4); + + if (stackType == StackTypeMP || stackType == StackTypeO) + INVALID_CODE_RET_VOID; + if (opBase == INTOP_NOT_I4 && (stackType != StackTypeI4 && stackType != StackTypeI8)) + INVALID_CODE_RET_VOID; + + m_pStackPointer--; + AddIns(finalOpcode); + m_pLastIns->SetSVar(m_pStackPointer[0].var); + PushStackType(stackType, NULL); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitShiftOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(2); + StackType stackType = m_pStackPointer[-2].type; + StackType shiftAmountType = m_pStackPointer[-1].type; + int32_t typeOffset = stackType - StackTypeI4; + int32_t finalOpcode = opBase + typeOffset; + + if ((stackType != StackTypeI4 && stackType != StackTypeI8) || + (shiftAmountType != StackTypeI4 && shiftAmountType != StackTypeI)) + INVALID_CODE_RET_VOID; + + m_pStackPointer -= 2; + AddIns(finalOpcode); + m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushStackType(stackType, NULL); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); +} + +void InterpCompiler::EmitCompareOp(int32_t opBase) +{ + CHECK_STACK_RET_VOID(2); + if (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeMP) + { + AddIns(opBase + StackTypeI - StackTypeI4); + } + else + { + if (m_pStackPointer[-1].type == StackTypeR4 && m_pStackPointer[-2].type == StackTypeR8) + EmitConv(m_pStackPointer - 1, NULL, StackTypeR8, INTOP_CONV_R8_R4); + if (m_pStackPointer[-1].type == StackTypeR8 && m_pStackPointer[-2].type == StackTypeR4) + EmitConv(m_pStackPointer - 2, NULL, StackTypeR8, INTOP_CONV_R8_R4); + AddIns(opBase + m_pStackPointer[-1].type - StackTypeI4); + } + m_pStackPointer -= 2; + m_pLastIns->SetSVars2(m_pStackPointer[0].var, m_pStackPointer[1].var); + PushStackType(StackTypeI4, NULL); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); +} + int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) { uint8_t *codeEnd; @@ -1794,6 +1975,51 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_ip += 2; break; + case CEE_ADD: + EmitBinaryArithmeticOp(INTOP_ADD_I4); + m_ip++; + break; + case CEE_SUB: + EmitBinaryArithmeticOp(INTOP_SUB_I4); + m_ip++; + break; + case CEE_MUL: + EmitBinaryArithmeticOp(INTOP_MUL_I4); + m_ip++; + break; + case CEE_AND: + EmitBinaryArithmeticOp(INTOP_AND_I4); + m_ip++; + break; + case CEE_OR: + EmitBinaryArithmeticOp(INTOP_OR_I4); + m_ip++; + break; + case CEE_XOR: + EmitBinaryArithmeticOp(INTOP_XOR_I4); + m_ip++; + break; + case CEE_SHL: + EmitShiftOp(INTOP_SHL_I4); + m_ip++; + break; + case CEE_SHR: + EmitShiftOp(INTOP_SHR_I4); + m_ip++; + break; + case CEE_SHR_UN: + EmitShiftOp(INTOP_SHR_UN_I4); + m_ip++; + break; + case CEE_NEG: + EmitUnaryArithmeticOp(INTOP_NEG_I4); + m_ip++; + break; + case CEE_NOT: + EmitUnaryArithmeticOp(INTOP_NOT_I4); + m_ip++; + break; + case CEE_PREFIX1: m_ip++; switch (*m_ip + 256) @@ -1814,6 +2040,26 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) EmitStoreVar(numArgs + getU2LittleEndian(m_ip + 1));\ m_ip += 3; break; + case CEE_CEQ: + EmitCompareOp(INTOP_CEQ_I4); + m_ip++; + break; + case CEE_CGT: + EmitCompareOp(INTOP_CGT_I4); + m_ip++; + break; + case CEE_CGT_UN: + EmitCompareOp(INTOP_CGT_UN_I4); + m_ip++; + break; + case CEE_CLT: + EmitCompareOp(INTOP_CLT_I4); + m_ip++; + break; + case CEE_CLT_UN: + EmitCompareOp(INTOP_CLT_UN_I4); + m_ip++; + break; default: assert(0); break; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 31af3123979b0f..b5b91511b3adb0 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -272,6 +272,10 @@ class InterpCompiler void EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp); void EmitLoadVar(int var); void EmitStoreVar(int var); + void EmitBinaryArithmeticOp(int32_t opBase); + void EmitUnaryArithmeticOp(int32_t opBase); + void EmitShiftOp(int32_t opBase); + void EmitCompareOp(int32_t opBase); // Passes int32_t* m_pMethodCode; diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 08488c6e7eba9c..a7f2498a6c0265 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -71,6 +71,16 @@ OPDEF(INTOP_BLT_UN_I8, "blt.un.i8", 4, 0, 2, InterpOpBranch) OPDEF(INTOP_BLT_UN_R4, "blt.un.r4", 4, 0, 2, InterpOpBranch) OPDEF(INTOP_BLT_UN_R8, "blt.un.r8", 4, 0, 2, InterpOpBranch) +// Unary operations + +OPDEF(INTOP_NEG_I4, "neg.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NEG_I8, "neg.i8", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NEG_R4, "neg.r4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NEG_R8, "neg.r8", 3, 1, 1, InterpOpNoArgs) + +OPDEF(INTOP_NOT_I4, "not.i4", 3, 1, 1, InterpOpNoArgs) +OPDEF(INTOP_NOT_I8, "not.i8", 3, 1, 1, InterpOpNoArgs) + OPDEF(INTOP_CONV_R_UN_I4, "conv.r.un.i4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_CONV_R_UN_I8, "conv.r.un.i8", 3, 1, 1, InterpOpNoArgs) @@ -115,3 +125,63 @@ OPDEF(INTOP_CONV_R8_R4, "conv.r8.r4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_CONV_U8_R4, "conv.u8.r4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_CONV_U8_R8, "conv.u8.r8", 3, 1, 1, InterpOpNoArgs) +// Unary operations end + +// Binary operations + +OPDEF(INTOP_ADD_I4, "add.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_ADD_I8, "add.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_ADD_R4, "add.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_ADD_R8, "add.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_SUB_I4, "sub.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SUB_I8, "sub.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SUB_R4, "sub.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SUB_R8, "sub.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_MUL_I4, "mul.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_I8, "mul.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_R4, "mul.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_MUL_R8, "mul.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_AND_I4, "and.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_AND_I8, "and.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_OR_I4, "or.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_OR_I8, "or.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_XOR_I4, "xor.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_XOR_I8, "xor.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_SHR_UN_I4, "shr.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHR_UN_I8, "shr.un.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHL_I4, "shl.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHL_I8, "shl.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHR_I4, "shr.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_SHR_I8, "shr.i8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CEQ_I4, "ceq.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CEQ_I8, "ceq.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CEQ_R4, "ceq.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CEQ_R8, "ceq.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CGT_I4, "cgt.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_I8, "cgt.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_R4, "cgt.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_R8, "cgt.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CGT_UN_I4, "cgt.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_UN_I8, "cgt.un.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_UN_R4, "cgt.un.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CGT_UN_R8, "cgt.un.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CLT_I4, "clt.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_I8, "clt.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_R4, "clt.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_R8, "clt.r8", 4, 1, 2, InterpOpNoArgs) + +OPDEF(INTOP_CLT_UN_I4, "clt.un.i4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_UN_I8, "clt.un.i8", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_UN_R4, "clt.un.r4", 4, 1, 2, InterpOpNoArgs) +OPDEF(INTOP_CLT_UN_R8, "clt.un.r8", 4, 1, 2, InterpOpNoArgs) +// Binary operations end diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 65c1651f17df76..56661f56308b16 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "interpexec.h" +#include #ifdef FEATURE_INTERPRETER @@ -166,7 +167,7 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh ip += 3; break; case INTOP_CONV_I8_U4: - LOCAL_VAR(ip[1], int64_t) = (uint32_t)LOCAL_VAR (ip[2], int32_t); + LOCAL_VAR(ip[1], int64_t) = (uint32_t)LOCAL_VAR(ip[2], int32_t); ip += 3; break;; case INTOP_CONV_I8_R4: @@ -358,6 +359,217 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh assert(0); break; + case INTOP_ADD_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) + LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_ADD_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) + LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_ADD_R4: + LOCAL_VAR(ip[1], float) = LOCAL_VAR(ip[2], float) + LOCAL_VAR(ip[3], float); + ip += 4; + break; + case INTOP_ADD_R8: + LOCAL_VAR(ip[1], double) = LOCAL_VAR(ip[2], double) + LOCAL_VAR(ip[3], double); + ip += 4; + break; + + case INTOP_SUB_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) - LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_SUB_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) - LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_SUB_R4: + LOCAL_VAR(ip[1], float) = LOCAL_VAR(ip[2], float) - LOCAL_VAR(ip[3], float); + ip += 4; + break; + case INTOP_SUB_R8: + LOCAL_VAR(ip[1], double) = LOCAL_VAR(ip[2], double) - LOCAL_VAR(ip[3], double); + ip += 4; + break; + + case INTOP_MUL_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) * LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_MUL_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) * LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_MUL_R4: + LOCAL_VAR(ip[1], float) = LOCAL_VAR(ip[2], float) * LOCAL_VAR(ip[3], float); + ip += 4; + break; + case INTOP_MUL_R8: + LOCAL_VAR(ip[1], double) = LOCAL_VAR(ip[2], double) * LOCAL_VAR(ip[3], double); + ip += 4; + break; + + case INTOP_SHL_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) << LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_SHL_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) << LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_SHR_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) >> LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_SHR_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) >> LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_SHR_UN_I4: + LOCAL_VAR(ip[1], uint32_t) = LOCAL_VAR(ip[2], uint32_t) >> LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_SHR_UN_I8: + LOCAL_VAR(ip[1], uint64_t) = LOCAL_VAR(ip[2], uint64_t) >> LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + + case INTOP_NEG_I4: + LOCAL_VAR(ip[1], int32_t) = - LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_NEG_I8: + LOCAL_VAR(ip[1], int64_t) = - LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + case INTOP_NEG_R4: + LOCAL_VAR(ip[1], float) = - LOCAL_VAR(ip[2], float); + ip += 3; + break; + case INTOP_NEG_R8: + LOCAL_VAR(ip[1], double) = - LOCAL_VAR(ip[2], double); + ip += 3; + break; + case INTOP_NOT_I4: + LOCAL_VAR(ip[1], int32_t) = ~ LOCAL_VAR(ip[2], int32_t); + ip += 3; + break; + case INTOP_NOT_I8: + LOCAL_VAR(ip[1], int64_t) = ~ LOCAL_VAR(ip[2], int64_t); + ip += 3; + break; + + case INTOP_AND_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) & LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_AND_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) & LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_OR_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) | LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_OR_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) | LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_XOR_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) ^ LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_XOR_I8: + LOCAL_VAR(ip[1], int64_t) = LOCAL_VAR(ip[2], int64_t) ^ LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + +#define CMP_BINOP_FP(datatype, op, noOrderVal) \ + do { \ + datatype f1 = LOCAL_VAR(ip[2], datatype); \ + datatype f2 = LOCAL_VAR(ip[3], datatype); \ + if (isunordered(f1, f2)) \ + LOCAL_VAR(ip[1], int32_t) = noOrderVal; \ + else \ + LOCAL_VAR(ip[1], int32_t) = f1 op f2; \ + ip += 4; \ + } while (0) + + case INTOP_CEQ_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) == LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_CEQ_I8: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int64_t) == LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_CEQ_R4: + CMP_BINOP_FP(float, ==, 0); + break; + case INTOP_CEQ_R8: + CMP_BINOP_FP(double, ==, 0); + break; + + case INTOP_CGT_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) > LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_CGT_I8: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int64_t) > LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_CGT_R4: + CMP_BINOP_FP(float, >, 0); + break; + case INTOP_CGT_R8: + CMP_BINOP_FP(double, >, 0); + break; + + case INTOP_CGT_UN_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], uint32_t) > LOCAL_VAR(ip[3], uint32_t); + ip += 4; + break; + case INTOP_CGT_UN_I8: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], uint32_t) > LOCAL_VAR(ip[3], uint32_t); + ip += 4; + break; + case INTOP_CGT_UN_R4: + CMP_BINOP_FP(float, >, 1); + break; + case INTOP_CGT_UN_R8: + CMP_BINOP_FP(double, >, 1); + break; + + case INTOP_CLT_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) < LOCAL_VAR(ip[3], int32_t); + ip += 4; + break; + case INTOP_CLT_I8: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int64_t) < LOCAL_VAR(ip[3], int64_t); + ip += 4; + break; + case INTOP_CLT_R4: + CMP_BINOP_FP(float, <, 0); + break; + case INTOP_CLT_R8: + CMP_BINOP_FP(double, <, 0); + break; + + case INTOP_CLT_UN_I4: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], uint32_t) < LOCAL_VAR(ip[3], uint32_t); + ip += 4; + break; + case INTOP_CLT_UN_I8: + LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], uint64_t) < LOCAL_VAR(ip[3], uint64_t); + ip += 4; + break; + case INTOP_CLT_UN_R4: + CMP_BINOP_FP(float, <, 1); + break; + case INTOP_CLT_UN_R8: + CMP_BINOP_FP(double, <, 1); + break; default: assert(0); break; From 9304c5c39d44314c4abbd093bcf366468eef370c Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Wed, 26 Feb 2025 00:17:47 +0200 Subject: [PATCH 09/16] branch floating point comparisons squash into branches --- src/coreclr/vm/interpexec.cpp | 121 ++++++++++++++++++++++++++++++---- 1 file changed, 110 insertions(+), 11 deletions(-) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 56661f56308b16..e7abcc48b8f266 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -264,10 +264,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(int64_t, ==); break; case INTOP_BEQ_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(!isunordered(f1, f2) && f1 == f2); + break; + } case INTOP_BEQ_R8: - // TODO Floating point comparisons - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(!isunordered(d1, d2) && d1 == d2); break; + } case INTOP_BGE_I4: BR_BINOP(int32_t, >=); break; @@ -275,9 +284,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(int64_t, >=); break; case INTOP_BGE_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(!isunordered(f1, f2) && f1 >= f2); + break; + } case INTOP_BGE_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(!isunordered(d1, d2) && d1 >= d2); break; + } case INTOP_BGT_I4: BR_BINOP(int32_t, >); break; @@ -285,9 +304,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(int64_t, >); break; case INTOP_BGT_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(!isunordered(f1, f2) && f1 > f2); + break; + } case INTOP_BGT_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(!isunordered(d1, d2) && d1 > d2); break; + } case INTOP_BLT_I4: BR_BINOP(int32_t, <); break; @@ -295,9 +324,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(int64_t, <); break; case INTOP_BLT_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(!isunordered(f1, f2) && f1 < f2); + break; + } case INTOP_BLT_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(!isunordered(d1, d2) && d1 < d2); break; + } case INTOP_BLE_I4: BR_BINOP(int32_t, <=); break; @@ -305,9 +344,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(int64_t, <=); break; case INTOP_BLE_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(!isunordered(f1, f2) && f1 <= f2); + break; + } case INTOP_BLE_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(!isunordered(d1, d2) && d1 <= d2); break; + } case INTOP_BNE_UN_I4: BR_BINOP(uint32_t, !=); break; @@ -315,9 +364,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(uint64_t, !=); break; case INTOP_BNE_UN_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(isunordered(f1, f2) || f1 != f2); + break; + } case INTOP_BNE_UN_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(isunordered(d1, d2) || d1 != d2); break; + } case INTOP_BGE_UN_I4: BR_BINOP(uint32_t, >=); break; @@ -325,9 +384,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(uint64_t, >=); break; case INTOP_BGE_UN_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(isunordered(f1, f2) || f1 >= f2); + break; + } case INTOP_BGE_UN_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(isunordered(d1, d2) || d1 >= d2); break; + } case INTOP_BGT_UN_I4: BR_BINOP(uint32_t, >); break; @@ -335,9 +404,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(uint64_t, >); break; case INTOP_BGT_UN_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(isunordered(f1, f2) || f1 > f2); + break; + } case INTOP_BGT_UN_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(isunordered(d1, d2) || d1 > d2); break; + } case INTOP_BLE_UN_I4: BR_BINOP(uint32_t, <=); break; @@ -345,9 +424,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(uint64_t, <=); break; case INTOP_BLE_UN_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(isunordered(f1, f2) || f1 <= f2); + break; + } case INTOP_BLE_UN_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(isunordered(d1, d2) || d1 <= d2); break; + } case INTOP_BLT_UN_I4: BR_BINOP(uint32_t, <); break; @@ -355,9 +444,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh BR_BINOP(uint64_t, <); break; case INTOP_BLT_UN_R4: + { + float f1 = LOCAL_VAR(ip[1], float); + float f2 = LOCAL_VAR(ip[2], float); + BR_BINOP_COND(isunordered(f1, f2) || f1 < f2); + break; + } case INTOP_BLT_UN_R8: - assert(0); + { + double d1 = LOCAL_VAR(ip[1], double); + double d2 = LOCAL_VAR(ip[2], double); + BR_BINOP_COND(isunordered(d1, d2) || d1 < d2); break; + } case INTOP_ADD_I4: LOCAL_VAR(ip[1], int32_t) = LOCAL_VAR(ip[2], int32_t) + LOCAL_VAR(ip[3], int32_t); From 07d91c5b3c206ccc23a70ff362da2615f5729e2d Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 28 Feb 2025 10:32:04 +0200 Subject: [PATCH 10/16] Add support for direct calls Unlike other instructions, INTOP_CALL doesn't have a fixed number of sVars, since the method to be called can have any signature. Instead it will hold a -1 terminated array of sVars. The arguments will be placed one after the other, at an offset referred to as callArgsOffset, representing the only source offset for the call instruction. Vars that are sources for a call will be allocated in this sequential position by the var offset allocator, which is not yet included as part of this commit. INTOP_CALL instruction receives an additional fixed argument, which is a tagged `MethodDesc*`. Rather than embedding the pointer into the instruction stream, this is passed as an index into a table. This will both reduce code memory use, since identical data will be stored at the same index, as well as provide us with an unique slot to atomically patch the data. During first execution of a call, we would observe that the data item is a tagged pointer, suggesting this is a MethodDesc*, in which case we try to obtain the actual method code, triggering method compilation if necessary. Once this is done, the MethodDesc pointer will be replaced with the actual code pointer. In the future, this pointer can be either interpreter IR or jit compiled code. Given we currently only have support to specify a single method to be interpreted, in order to expand on this, this commit introduces a temporary hack behavior where once we enter the interpreter, we don't exit it for methods from the same module. In the future we might want to switch this to passing an env var specifying which assembly to be interpreted and which to be jitted. --- src/coreclr/interpreter/compiler.cpp | 120 +++++++++++++++++++- src/coreclr/interpreter/compiler.h | 12 ++ src/coreclr/interpreter/datastructs.h | 14 ++- src/coreclr/interpreter/eeinterp.cpp | 29 ++++- src/coreclr/interpreter/interpretershared.h | 6 +- src/coreclr/interpreter/intops.def | 3 + src/coreclr/interpreter/intops.h | 1 + src/coreclr/vm/interpexec.cpp | 70 ++++++++++++ src/coreclr/vm/interpexec.h | 14 ++- 9 files changed, 257 insertions(+), 12 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index a73ecfee4b9c82..d43af2db73e9e2 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -682,7 +682,13 @@ void InterpCompiler::EmitCode() InterpMethod* InterpCompiler::CreateInterpMethod() { - InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize); + int numDataItems = m_dataItems.GetSize(); + void **pDataItems = (void**)AllocMethodData(numDataItems * sizeof(void*)); + + for (int i = 0; i < numDataItems; i++) + pDataItems[i] = m_dataItems.Get(i); + + InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize, pDataItems); return pMethod; } @@ -697,6 +703,7 @@ InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo) { m_methodHnd = methodInfo->ftn; + m_compScopeHnd = methodInfo->scope; m_compHnd = compHnd; m_methodInfo = methodInfo; } @@ -1291,8 +1298,91 @@ void InterpCompiler::EmitCompareOp(int32_t opBase) m_pLastIns->SetDVar(m_pStackPointer[-1].var); } +int32_t InterpCompiler::GetDataItemIndex(void *data) +{ + int32_t index = m_dataItems.Find(data); + if (index != -1) + return index; + + return m_dataItems.Add(data); +} + +int32_t InterpCompiler::GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle) +{ + size_t data = (size_t)mHandle | INTERP_METHOD_DESC_TAG; + return GetDataItemIndex((void*)data); +} + +void InterpCompiler::EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall) +{ + uint32_t token = getU4LittleEndian(m_ip + 1); + CORINFO_RESOLVED_TOKEN resolvedToken; + + resolvedToken.tokenScope = m_compScopeHnd; + resolvedToken.tokenContext = METHOD_BEING_COMPILED_CONTEXT(); + resolvedToken.token = token; + resolvedToken.tokenType = CORINFO_TOKENKIND_Method; + m_compHnd->resolveToken(&resolvedToken); + + CORINFO_METHOD_HANDLE targetMethod = resolvedToken.hMethod; + + CORINFO_SIG_INFO targetSignature; + m_compHnd->getMethodSig(targetMethod, &targetSignature); + + // Process sVars + int numArgs = targetSignature.numArgs + targetSignature.hasThis(); + m_pStackPointer -= numArgs; + + int *callArgs = (int*) AllocMemPool((numArgs + 1) * sizeof(int)); + for (int i = 0; i < numArgs; i++) + callArgs[i] = m_pStackPointer [i].var; + callArgs[numArgs] = -1; + + // Process dVar + int32_t dVar; + if (targetSignature.retType != CORINFO_TYPE_VOID) + { + InterpType interpType = GetInterpType(targetSignature.retType); + + if (interpType == InterpTypeVT) + { + int32_t size = m_compHnd->getClassSize(targetSignature.retTypeClass); + PushTypeVT(targetSignature.retTypeClass, size); + } + else + { + PushInterpType(interpType, NULL); + } + dVar = m_pStackPointer[-1].var; + } + else + { + // Create a new dummy var to serve as the dVar of the call + // FIXME Consider adding special dVar type (ex -1), that is + // resolved to null offset. The opcode shouldn't really write to it + PushStackType(StackTypeI4, NULL); + m_pStackPointer--; + dVar = m_pStackPointer[0].var; + } + + // Emit call instruction + AddIns(INTOP_CALL); + m_pLastIns->SetDVar(dVar); + m_pLastIns->SetSVar(CALL_ARGS_SVAR); + m_pLastIns->data[0] = GetMethodDataItemIndex(targetMethod); + + m_pLastIns->flags |= INTERP_INST_FLAG_CALL; + m_pLastIns->info.pCallInfo = (InterpCallInfo*)AllocMemPool0(sizeof (InterpCallInfo)); + m_pLastIns->info.pCallInfo->pCallArgs = callArgs; + + m_ip += 5; +} + int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) { + bool readonly = false; + bool tailcall = false; + CORINFO_CLASS_HANDLE constrainedClass = NULL; uint8_t *codeEnd; int numArgs = m_methodInfo->args.hasThis() + m_methodInfo->args.numArgs; bool emittedBBlocks, linkBBlocks, needsRetryEmit; @@ -2019,6 +2109,12 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) EmitUnaryArithmeticOp(INTOP_NOT_I4); m_ip++; break; + case CEE_CALL: + EmitCall(constrainedClass, readonly, tailcall); + constrainedClass = NULL; + readonly = false; + tailcall = false; + break; case CEE_PREFIX1: m_ip++; @@ -2060,6 +2156,28 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) EmitCompareOp(INTOP_CLT_UN_I4); m_ip++; break; + case CEE_CONSTRAINED: + { + uint32_t token = getU4LittleEndian(m_ip + 1); + CORINFO_RESOLVED_TOKEN resolvedToken; + + resolvedToken.tokenScope = m_compScopeHnd; + resolvedToken.tokenContext = METHOD_BEING_COMPILED_CONTEXT(); + resolvedToken.token = token; + resolvedToken.tokenType = CORINFO_TOKENKIND_Constrained; + m_compHnd->resolveToken(&resolvedToken); + constrainedClass = resolvedToken.hClass; + m_ip += 5; + break; + } + case CEE_READONLY: + readonly = true; + m_ip++; + break; + case CEE_TAILCALL: + tailcall = true; + m_ip++; + break; default: assert(0); break; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index b5b91511b3adb0..c3c223aea7ca29 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -58,6 +58,11 @@ struct InterpCallInfo struct InterpBasicBlock; +enum InterpInstFlags +{ + INTERP_INST_FLAG_CALL = 0x01, +}; + struct InterpInst { InterpInst *pNext, *pPrev; @@ -186,6 +191,7 @@ class InterpCompiler { private: CORINFO_METHOD_HANDLE m_methodHnd; + CORINFO_MODULE_HANDLE m_compScopeHnd; COMP_HANDLE m_compHnd; CORINFO_METHOD_INFO* m_methodInfo; @@ -195,6 +201,11 @@ class InterpCompiler uint8_t* m_pILCode; int32_t m_ILCodeSize; + // FIXME during compilation this should be a hashtable for fast lookup of duplicates + PtrArray m_dataItems; + int32_t GetDataItemIndex(void* data); + int32_t GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle); + int GenerateCode(CORINFO_METHOD_INFO* methodInfo); void* AllocMethodData(size_t numBytes); @@ -276,6 +287,7 @@ class InterpCompiler void EmitUnaryArithmeticOp(int32_t opBase); void EmitShiftOp(int32_t opBase); void EmitCompareOp(int32_t opBase); + void EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall); // Passes int32_t* m_pMethodCode; diff --git a/src/coreclr/interpreter/datastructs.h b/src/coreclr/interpreter/datastructs.h index 9d18997e792ea2..bcace3cfea4afa 100644 --- a/src/coreclr/interpreter/datastructs.h +++ b/src/coreclr/interpreter/datastructs.h @@ -39,12 +39,12 @@ class PtrArray return m_size; } - void Add(T element) + int32_t Add(T element) { if (m_size == m_capacity) Grow(); m_array[m_size] = element; - m_size++; + return m_size++; } T Get(int32_t index) @@ -52,6 +52,16 @@ class PtrArray assert(index < m_size); return m_array[index]; } + + int32_t Find(T element) + { + for (int i = 0; i < m_size; i++) + { + if (element == m_array[i]) + return i; + } + return -1; + } }; #endif diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp index 255890e86deac5..9f154ba9ba5b97 100644 --- a/src/coreclr/interpreter/eeinterp.cpp +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -40,6 +40,9 @@ extern "C" INTERP_API ICorJitCompiler* getJit() return &g_CILInterp; } + +static CORINFO_MODULE_HANDLE g_interpModule = NULL; + //**************************************************************************** CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, CORINFO_METHOD_INFO* methodInfo, @@ -48,13 +51,27 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, uint32_t* nativeSizeOfCode) { - const char *methodName = compHnd->getMethodNameFromMetadata(methodInfo->ftn, nullptr, nullptr, nullptr, 0); + bool doInterpret; - // TODO: replace this by something like the JIT does to support multiple methods being specified and we don't - // keep fetching it on each call to compileMethod - const char *methodToInterpret = g_interpHost->getStringConfigValue("AltJit"); - bool doInterpret = (methodName != NULL && strcmp(methodName, methodToInterpret) == 0); - g_interpHost->freeStringConfigValue(methodToInterpret); + if (g_interpModule != NULL) + { + if (methodInfo->scope == g_interpModule) + doInterpret = true; + else + doInterpret = false; + } + else + { + const char *methodName = compHnd->getMethodNameFromMetadata(methodInfo->ftn, nullptr, nullptr, nullptr, 0); + + // TODO: replace this by something like the JIT does to support multiple methods being specified and we don't + // keep fetching it on each call to compileMethod + const char *methodToInterpret = g_interpHost->getStringConfigValue("AltJit"); + doInterpret = (methodName != NULL && strcmp(methodName, methodToInterpret) == 0); + g_interpHost->freeStringConfigValue(methodToInterpret); + if (doInterpret) + g_interpModule = methodInfo->scope; + } if (!doInterpret) { diff --git a/src/coreclr/interpreter/interpretershared.h b/src/coreclr/interpreter/interpretershared.h index 8982dc0c201deb..5e8928b840bafd 100644 --- a/src/coreclr/interpreter/interpretershared.h +++ b/src/coreclr/interpreter/interpretershared.h @@ -11,15 +11,19 @@ #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame +#define INTERP_METHOD_DESC_TAG 4 // Tag of a MethodDesc in the interp method dataItems + struct InterpMethod { CORINFO_METHOD_HANDLE methodHnd; int32_t allocaSize; + void** pDataItems; - InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize) + InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems) { this->methodHnd = methodHnd; this->allocaSize = allocaSize; + this->pDataItems = pDataItems; } }; diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index a7f2498a6c0265..37e8e5ce57e2ee 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -185,3 +185,6 @@ OPDEF(INTOP_CLT_UN_I8, "clt.un.i8", 4, 1, 2, InterpOpNoArgs) OPDEF(INTOP_CLT_UN_R4, "clt.un.r4", 4, 1, 2, InterpOpNoArgs) OPDEF(INTOP_CLT_UN_R8, "clt.un.r8", 4, 1, 2, InterpOpNoArgs) // Binary operations end + +// Calls +OPDEF(INTOP_CALL, "call", 4, 1, 1, InterpOpMethodToken) diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index ad462c09761713..2c37cd6f3b62aa 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -15,6 +15,7 @@ typedef enum InterpOpInt, InterpOpBranch, InterpOpSwitch, + InterpOpMethodToken, } InterpOpArgType; extern const uint8_t g_interpOpLen[]; diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index e7abcc48b8f266..271ade7a8bf843 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -41,6 +41,9 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh ip = pFrame->startIp + sizeof(InterpMethod*) / sizeof(int32_t); stack = pFrame->pStack; + int32_t returnOffset, callArgsOffset; + +MAIN_LOOP: while (true) { switch (*ip) @@ -669,6 +672,60 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh case INTOP_CLT_UN_R8: CMP_BINOP_FP(double, <, 1); break; + + case INTOP_CALL: + { + size_t targetMethod = (size_t)pMethod->pDataItems[ip[3]]; + returnOffset = ip[1]; + callArgsOffset = ip[2]; + const int32_t *targetIp; + + if (targetMethod & INTERP_METHOD_DESC_TAG) + { + // First execution of this call. Ensure target method is compiled and + // patch the data item slot with the actual method code. + MethodDesc *pMD = (MethodDesc*)(targetMethod & ~INTERP_METHOD_DESC_TAG); + PCODE code = pMD->GetNativeCode(); + if (!code) { + pMD->PrepareInitialCode(CallerGCMode::Coop); + code = pMD->GetNativeCode(); + } + pMethod->pDataItems[ip[3]] = (void*)code; + targetIp = (const int32_t*)code; + } + else + { + // At this stage in the implementation, we assume this is pointer to + // interpreter code. In the future, this should probably be tagged pointer + // for interpreter call or normal pointer for JIT/R2R call. + targetIp = (const int32_t*)targetMethod; + } + + // Save current execution state once we return from called method + pFrame->ip = ip + 4; + + // Allocate child frame. + { + InterpMethodContextFrame *pChildFrame = pFrame->pNextFree; + if (!pChildFrame) + { + pChildFrame = (InterpMethodContextFrame*)alloca(sizeof(InterpMethodContextFrame)); + pChildFrame->pNextFree = NULL; + // Not free currently, but will be when allocation attempted. + pFrame->pNextFree = pChildFrame; + } + pChildFrame->ReInit(pFrame, targetIp, stack + returnOffset, stack + callArgsOffset); + pFrame = pChildFrame; + } + assert (((size_t)pFrame->pStack % INTERP_STACK_ALIGNMENT) == 0); + + // Set execution state for the new frame + pMethod = *(InterpMethod**)pFrame->startIp; + stack = pFrame->pStack; + ip = pFrame->startIp + sizeof(InterpMethod*) / sizeof(int32_t); + pThreadContext->pStackPointer = stack + pMethod->allocaSize; + break; + } default: assert(0); break; @@ -676,6 +733,19 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh } EXIT_FRAME: + if (pFrame->pParent && pFrame->pParent->ip) + { + // Return to the main loop after a non-recursive interpreter call + pFrame = pFrame->pParent; + ip = pFrame->ip; + stack = pFrame->pStack; + pMethod = *(InterpMethod**)pFrame->startIp; + pFrame->ip = NULL; + + pThreadContext->pStackPointer = pFrame->pStack + pMethod->allocaSize; + goto MAIN_LOOP; + } + pThreadContext->pStackPointer = pFrame->pStack; } diff --git a/src/coreclr/vm/interpexec.h b/src/coreclr/vm/interpexec.h index 52bc9d234834c6..f7fe2bddb74c40 100644 --- a/src/coreclr/vm/interpexec.h +++ b/src/coreclr/vm/interpexec.h @@ -24,10 +24,20 @@ struct StackVal struct InterpMethodContextFrame { InterpMethodContextFrame *pParent; - int32_t *startIp; // from start_ip we can obtain InterpMethod and MethodDesc + const int32_t *startIp; // from startIp we can obtain InterpMethod and MethodDesc int8_t *pStack; int8_t *pRetVal; - int32_t *ip; + const int32_t *ip; // This ip is updated only when execution can leave the frame + InterpMethodContextFrame *pNextFree; + + void ReInit(InterpMethodContextFrame *pParent, const int32_t *startIp, int8_t *pRetVal, int8_t *pStack) + { + this->pParent = pParent; + this->startIp = startIp; + this->pRetVal = pRetVal; + this->pStack = pStack; + this->ip = NULL; + } }; struct InterpThreadContext From 7defb20ba06afb6c963558fee3baccbd6f3474a5 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 3 Mar 2025 23:01:31 +0200 Subject: [PATCH 11/16] Add the offset allocator for vars The var offset allocator allocates for each var a positive offset. All instructions will end up referencing this offset instead of the var index. During execution, these offsets are added to the frame's stack pointer to obtain the actual location on the interpreter stack for the value. We have three logical areas on the interpreter stack where vars reside. The global vars space, the local vars space followed at last by the param area space. In the global vars space, each global var will have an offset allocated to it, that its constant throughout method execution. This space is first filled by the argument vars (which must be at the beginning of the stack, according to the interpreter ccall convention). Then we allocate each IL local in this space. Once we finish generating code for the method, we enter the actual var offset allocator. First we detect which vars are used in multiple basic blocks, these vars will be marked as global and have an unique offset allocated. The space following the global vars will be used for allocating vars used in a single basic block. We will traverse each bblock and, as a local var is defined, we will allocate it at the current offset. We have liveStart and liveEnd computed for each local var (measured as instruction indexes in the corresponding basic block). Based on these liveness markers, we determine when to pop vars from the set of active vars (vars that are currently live). Variables that are arguments to a call will be allocated instead in the param area. Given we don't know the exact offset of the param area during bblock iteration (because it follows the local vars space, which is determined only after traversing all bblocks), these vars will be initially allocated with an offset relative within the param area. At the very end, we will offset these with the param area offset. Vars passed to calls have to die immediately following the call. If the call arg var can't satisify this constraint (for example the var is global, or a local var that could be referenced following the call), we will create a new temporary var that we copy the arg into, and this new var will serve as the call arg instead. --- src/coreclr/interpreter/compiler.cpp | 20 +- src/coreclr/interpreter/compiler.h | 50 +++- src/coreclr/interpreter/compileropt.cpp | 339 +++++++++++++++++++++++- src/coreclr/interpreter/datastructs.h | 67 ++++- src/coreclr/interpreter/intops.def | 2 + 5 files changed, 453 insertions(+), 25 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index d43af2db73e9e2..f313178ce4fdcf 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -58,6 +58,13 @@ void* InterpCompiler::AllocTemporary(size_t numBytes) return malloc(numBytes); } +void* InterpCompiler::AllocTemporary0(size_t numBytes) +{ + void *ptr = AllocTemporary(numBytes); + memset(ptr, 0, numBytes); + return ptr; +} + void* InterpCompiler::ReallocTemporary(void* ptr, size_t numBytes) { return realloc(ptr, numBytes); @@ -456,13 +463,14 @@ int32_t InterpCompiler::CreateVarExplicit(InterpType interpType, CORINFO_CLASS_H m_pVars = (InterpVar*) ReallocTemporary(m_pVars, m_varsCapacity * sizeof(InterpVar)); } InterpVar *var = &m_pVars[m_varsSize]; + memset(var, 0, sizeof(InterpVar)); var->interpType = interpType; var->clsHnd = clsHnd; var->size = size; - var->indirects = 0; var->offset = -1; var->liveStart = -1; + var->bbIndex = -1; m_varsSize++; return m_varsSize - 1; @@ -557,7 +565,7 @@ int32_t InterpCompiler::ComputeCodeSize() return codeSize; } -int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, PtrArray *relocs) +int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArray *relocs) { int32_t opcode = ins->opcode; int32_t *startIp = ip; @@ -636,7 +644,7 @@ int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, PtrArray *relocs) +void InterpCompiler::PatchRelocations(TArray *relocs) { int32_t size = relocs->GetSize(); @@ -660,7 +668,7 @@ void InterpCompiler::PatchRelocations(PtrArray *relocs) void InterpCompiler::EmitCode() { - PtrArray relocs; + TArray relocs; int32_t codeSize = ComputeCodeSize(); m_pMethodCode = (int32_t*)AllocMethodData(codeSize * sizeof(int32_t)); @@ -815,7 +823,7 @@ void InterpCompiler::CreateILVars() // add some starting extra space for new vars m_varsCapacity = numILVars + 64; - m_pVars = (InterpVar*)AllocTemporary(m_varsCapacity * sizeof (InterpVar)); + m_pVars = (InterpVar*)AllocTemporary0(m_varsCapacity * sizeof (InterpVar)); m_varsSize = numILVars; offset = 0; @@ -844,7 +852,6 @@ void InterpCompiler::CreateILVars() m_pVars[i].clsHnd = argClass; m_pVars[i].global = true; m_pVars[i].ILGlobal = true; - m_pVars[i].indirects = 0; size = GetInterpTypeSize(argClass, interpType, &align); m_pVars[i].size = size; @@ -869,7 +876,6 @@ void InterpCompiler::CreateILVars() m_pVars[index].clsHnd = argClass; m_pVars[index].global = true; m_pVars[index].ILGlobal = true; - m_pVars[index].indirects = 0; size = GetInterpTypeSize(argClass, interpType, &align); m_pVars[index].size = size; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index c3c223aea7ca29..61ef7b1d083351 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -47,20 +47,29 @@ enum InterpType { #endif }; +struct InterpInst; +struct InterpBasicBlock; + struct InterpCallInfo { // For call instructions, this represents an array of all call arg vars // in the order they are pushed to the stack. This makes it easy to find // all source vars for these types of opcodes. This is terminated with -1. - int *pCallArgs; - int callOffset; + int32_t *pCallArgs; + int32_t callOffset; + union { + // Array of call dependencies that need to be resolved before + TSList *callDeps; + // Stack end offset of call arguments + int32_t callEndOffset; + }; }; -struct InterpBasicBlock; - enum InterpInstFlags { - INTERP_INST_FLAG_CALL = 0x01, + INTERP_INST_FLAG_CALL = 0x01, + // Flag used internally by the var offset allocator + INTERP_INST_FLAG_ACTIVE_CALL = 0x02 }; struct InterpInst @@ -143,9 +152,17 @@ struct InterpVar // live_start and live_end are used by the offset allocator int liveStart; int liveEnd; - + // index of first basic block where this var is used + int bbIndex; + // If var is callArgs, this is the call instruction using it. + // Only used by the var offset allocator + InterpInst *call; + + unsigned int callArgs : 1; // Var used as argument to a call + unsigned int noCallArgs : 1; // Var can't be used as argument to a call, needs to be copied to temp unsigned int global : 1; // Dedicated stack offset throughout method execution unsigned int ILGlobal : 1; // Args and IL locals + unsigned int alive : 1; // Used internally by the var offset allocator }; struct StackInfo @@ -202,7 +219,7 @@ class InterpCompiler int32_t m_ILCodeSize; // FIXME during compilation this should be a hashtable for fast lookup of duplicates - PtrArray m_dataItems; + TArray m_dataItems; int32_t GetDataItemIndex(void* data); int32_t GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle); @@ -212,6 +229,7 @@ class InterpCompiler void* AllocMemPool(size_t numBytes); void* AllocMemPool0(size_t numBytes); void* AllocTemporary(size_t numBytes); + void* AllocTemporary0(size_t numBytes); void* ReallocTemporary(void* ptr, size_t numBytes); void FreeTemporary(void* ptr); @@ -289,6 +307,20 @@ class InterpCompiler void EmitCompareOp(int32_t opBase); void EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall); + // Var Offset allocator + TArray *m_pActiveCalls; + TArray *m_pActiveVars; + TSList *m_pDeferredCalls; + + int32_t AllocGlobalVarOffset(int var); + void SetVarLiveRange(int32_t var, int insIndex); + void SetVarLiveRangeCB(int32_t *pVar, void *pData); + void InitializeGlobalVar(int32_t var, int bbIndex); + void InitializeGlobalVarCB(int32_t *pVar, void *pData); + void InitializeGlobalVars(); + void EndActiveCall(InterpInst *call); + void CompactActiveVars(int32_t *current_offset); + // Passes int32_t* m_pMethodCode; int32_t m_MethodCodeSize; // in int32_t @@ -296,8 +328,8 @@ class InterpCompiler void AllocOffsets(); int32_t ComputeCodeSize(); void EmitCode(); - int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns, PtrArray *relocs); - void PatchRelocations(PtrArray *relocs); + int32_t* EmitCodeIns(int32_t *ip, InterpInst *pIns, TArray *relocs); + void PatchRelocations(TArray *relocs); InterpMethod* CreateInterpMethod(); bool CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo); public: diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index 9e1f83ac34a1ed..c1b7a35f3f6b63 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -18,23 +18,350 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) return m_pVars[var].offset; } -void InterpCompiler::AllocVarOffsetCB(int *pVar, void *pData) +int32_t InterpCompiler::AllocGlobalVarOffset(int var) { - AllocVarOffset(*pVar, &m_totalVarsStackSize); + return AllocVarOffset(var, &m_totalVarsStackSize); +} + +// For a var that is local to the current bblock that we process, as we iterate +// over instructions we mark the first and last intruction using it. +void InterpCompiler::SetVarLiveRange(int32_t var, int insIndex) +{ + // We don't track liveness yet for global vars + if (m_pVars[var].global) + return; + if (m_pVars[var].liveStart == -1) + m_pVars[var].liveStart = insIndex; + m_pVars[var].liveEnd = insIndex; +} + +void InterpCompiler::SetVarLiveRangeCB(int32_t *pVar, void *pData) +{ + SetVarLiveRange(*pVar, (int)(size_t)pData); +} + +void InterpCompiler::InitializeGlobalVar(int32_t var, int bbIndex) +{ + // Check if already handled + if (m_pVars[var].global) + return; + + if (m_pVars[var].bbIndex == -1) + { + m_pVars[var].bbIndex = bbIndex; + } + else if (m_pVars[var].bbIndex != bbIndex) + { + AllocGlobalVarOffset(var); + m_pVars[var].global = true; + } +} + +void InterpCompiler::InitializeGlobalVarCB(int32_t *pVar, void *pData) +{ + InitializeGlobalVar(*pVar, (int)(size_t)pData); +} + +void InterpCompiler::InitializeGlobalVars() +{ + InterpBasicBlock *pBB; + for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) + { + InterpInst *pIns; + + for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) { + + int32_t opcode = pIns->opcode; + if (opcode == INTOP_NOP) + continue; + if (opcode == INTOP_LDLOCA) + { + int var = pIns->sVars[0]; + // If global flag is set, it means its offset was already allocated + if (!m_pVars[var].global) + { + AllocGlobalVarOffset(var); + m_pVars[var].global = true; + } + } + ForEachInsVar(pIns, (void*)(size_t)pBB->index, &InterpCompiler::InitializeGlobalVarCB); + } + } + m_totalVarsStackSize = ALIGN_UP_TO(m_totalVarsStackSize, INTERP_STACK_ALIGNMENT); +} + +// For each call instruction, this method computes its base offset. The base offset is computed as +// the max offset of all call offsets on which the call depends. Stack ensures that all call offsets +// on which the call depends are calculated before the call in question, by deferring calls from the +// last to the first one. +// +// This method allocates offsets of resolved calls following a constraint where the base offset +// of a call must be greater than the offset of any argument of other active call args. It first +// removes the call from an array of active calls. If a match is found, the call is removed from +// the array by moving the last entry into its place. Otherwise, it is a call without arguments. +// +// If there are active calls, the call in question is pushed onto the stack as a deferred call. +// The call contains a list of other active calls on which it depends. Those calls need to be +// resolved first in order to determine optimal base offset for the call in question. Otherwise, +// if there are no active calls, we resolve the call in question and deferred calls from the stack. +// +// For better understanding, consider a simple example: +// a <- _ +// b <- _ +// call1 c <- b +// d <- _ +// call2 _ <- a c d +// +// When `a` is defined, call2 becomes an active call, since `a` is part of call2 arguments. +// When `b` is defined, call1 also becomes an active call, +// When reaching call1, we attempt to resolve it. The problem with this is that call2 is already +// active, and all arguments of call1 should be placed after any arguments of call2 (in this example +// it would be enough for them to be placed after `a`, but for simplicity we place them after all +// arguments, so after `d` offset). Given call1 offset depends on call2 offset, we initialize its +// callDeps (to call2) and add call1 to the set of currently deferred calls. Call1 is no longer an +// an active call at this point. +// When reaching call2, we see we have no remaining active calls, so we will resolve its offset. +// Once the offset is resolved, we continue to resolve each remaining call from the deferred list. +// Processing call1, we iterate over each call dependency (in our case just call2) and allocate its +// offset accordingly so it doesn't overlap with any call2 args offsets. +void InterpCompiler::EndActiveCall(InterpInst *call) +{ + // Remove call from array + m_pActiveCalls->Remove(call); + + // Push active call that should be resolved onto the stack + if (m_pActiveCalls->GetSize()) + { + TSList *callDeps = NULL; + for (int i = 0; i < m_pActiveCalls->GetSize(); i++) + callDeps = TSList::Push(callDeps, m_pActiveCalls->Get(i)); + call->info.pCallInfo->callDeps = callDeps; + + m_pDeferredCalls = TSList::Push(m_pDeferredCalls, call); + } + else + { + call->info.pCallInfo->callDeps = NULL; + // If no other active calls, current active call and all deferred calls can be resolved from the stack + InterpInst *deferredCall = call; + while (deferredCall) { + // `base_offset` is a relative offset (to the start of the call args stack) where the args for this + // call reside. The deps for a call represent the list of active calls at the moment when the call ends. + // This means that all deps for a call end after the call in question. Given we iterate over the list + // of deferred calls from the last to the first one to end, all deps of a call are guaranteed to have + // been processed at this point. + int32_t baseOffset = 0; + for (TSList *list = deferredCall->info.pCallInfo->callDeps; list; list = list->pNext) + { + int32_t endOffset = list->data->info.pCallInfo->callEndOffset; + if (endOffset > baseOffset) + baseOffset = endOffset; + } + deferredCall->info.pCallInfo->callOffset = baseOffset; + // Compute to offset of each call argument + int32_t *callArgs = deferredCall->info.pCallInfo->pCallArgs; + if (callArgs && (*callArgs != -1)) + { + int32_t var = *callArgs; + while (var != -1) + { + AllocVarOffset(var, &baseOffset); + callArgs++; + var = *callArgs; + } + } + deferredCall->info.pCallInfo->callEndOffset = ALIGN_UP_TO(baseOffset, INTERP_STACK_ALIGNMENT); + + if (m_pDeferredCalls) + { + deferredCall = m_pDeferredCalls->data; + m_pDeferredCalls = TSList::Pop(m_pDeferredCalls); + } + else + { + deferredCall = NULL; + } + } + } +} + +// Remove dead vars from the end of the active vars array and update the current offset +// to point immediately after the first found alive var. The space that used to belong +// to the now dead vars will be reused for future defined local vars in the same bblock. +void InterpCompiler::CompactActiveVars(int32_t *pCurrentOffset) +{ + int32_t size = m_pActiveVars->GetSize(); + if (!size) + return; + int32_t i = size - 1; + while (i >= 0) + { + int32_t var = m_pActiveVars->Get(i); + // If var is alive we can't compact anymore + if (m_pVars[var].alive) + return; + *pCurrentOffset = m_pVars[var].offset; + m_pActiveVars->RemoveAt(i); + i--; + } } void InterpCompiler::AllocOffsets() { - // FIXME add proper offset allocator InterpBasicBlock *pBB; + m_pActiveVars = new TArray(); + m_pActiveCalls = new TArray(); + m_pDeferredCalls = NULL; + InitializeGlobalVars(); + + int finalVarsStackSize = m_totalVarsStackSize; + + // We now have the top of stack offset. All local regs are allocated after this offset, with each basic block for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) { InterpInst *pIns; + int insIndex = 0; + + // All data structs should be left empty after a bblock iteration + assert(m_pActiveVars->GetSize() == 0); + assert(m_pActiveCalls->GetSize() == 0); + assert(m_pDeferredCalls == NULL); for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) - ForEachInsSVar(pIns, NULL, &InterpCompiler::AllocVarOffsetCB); + { + if (pIns->opcode == INTOP_NOP) + continue; + // TODO NewObj will be marked as noCallArgs + if (pIns->flags & INTERP_INST_FLAG_CALL) + { + if (pIns->info.pCallInfo && pIns->info.pCallInfo->pCallArgs) + { + int32_t *callArgs = pIns->info.pCallInfo->pCallArgs; + int32_t var = *callArgs; + + while (var != -1) + { + if (m_pVars[var].global || m_pVars[var].noCallArgs) + { + // Some vars can't be allocated on the call args stack, since the constraint is that + // call args vars die after the call. This isn't necessarily true for global vars or + // vars that are used by other instructions aside from the call. + // We need to copy the var into a new tmp var + int newVar = CreateVarExplicit(m_pVars[var].interpType, m_pVars[var].clsHnd, m_pVars[var].size); + m_pVars[newVar].call = pIns; + m_pVars[newVar].callArgs = true; + + int32_t opcode = InterpGetMovForType(m_pVars[newVar].interpType, false); + InterpInst *newInst = InsertInsBB(pBB, pIns->pPrev, opcode); + newInst->SetDVar(newVar); + newInst->SetSVar(newVar); + if (opcode == INTOP_MOV_VT) + newInst->data[0] = m_pVars[var].size; + // The arg of the call is no longer global + *callArgs = newVar; + // Also update liveness for this instruction + ForEachInsVar(newInst, (void*)(size_t)insIndex, &InterpCompiler::SetVarLiveRangeCB); + insIndex++; + } + else + { + // Flag this var as it has special storage on the call args stack + m_pVars[var].call = pIns; + m_pVars[var].callArgs = true; + } + callArgs++; + var = *callArgs; + } + } + } + // Set liveStart and liveEnd for every referenced local that is not global + ForEachInsVar(pIns, (void*)(size_t)insIndex, &InterpCompiler::SetVarLiveRangeCB); + insIndex++; + } + int32_t currentOffset = m_totalVarsStackSize; + + insIndex = 0; + for (pIns = pBB->pFirstIns; pIns != NULL; pIns = pIns->pNext) { + int32_t opcode = pIns->opcode; + bool isCall = pIns->flags & INTERP_INST_FLAG_CALL; + + if (opcode == INTOP_NOP) + continue; + + // Expire source vars. We first mark them as not alive and then compact the array + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + { + int32_t var = pIns->sVars[i]; + if (var == CALL_ARGS_SVAR) + continue; + if (!m_pVars[var].global && m_pVars[var].liveEnd == insIndex) + { + // Mark the var as no longer being alive + assert(!m_pVars[var].callArgs); + m_pVars[var].alive = false; + } + } + + if (isCall) + EndActiveCall(pIns); + + CompactActiveVars(¤tOffset); + + // Alloc dreg local starting at the stack_offset + if (g_interpOpDVars[opcode]) + { + int32_t var = pIns->dVar; + + if (m_pVars[var].callArgs) + { + InterpInst *call = m_pVars[var].call; + // Check if already added + if (!(call->flags & INTERP_INST_FLAG_ACTIVE_CALL)) + { + m_pActiveCalls->Add(call); + // Mark a flag on it so we don't have to lookup the array with every argument store. + call->flags |= INTERP_INST_FLAG_ACTIVE_CALL; + } + } + else if (!m_pVars[var].global && m_pVars[var].offset == -1) + { + AllocVarOffset(var, ¤tOffset); + + if (currentOffset > finalVarsStackSize) + finalVarsStackSize = currentOffset; + + if (m_pVars[var].liveEnd > insIndex) + { + // If dVar is still used in the basic block, add it to the active list + m_pActiveVars->Add(var); + m_pVars[var].alive = true; + } + else + { + // Otherwise dealloc it + currentOffset = m_pVars[var].offset; + } + } + } + insIndex++; + } } - m_totalVarsStackSize = ALIGN_UP_TO(m_totalVarsStackSize, INTERP_STACK_ALIGNMENT); - m_paramAreaOffset = m_totalVarsStackSize; + finalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT); + + // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them) + // then also update td->total_locals_size to account for this space. + m_paramAreaOffset = finalVarsStackSize; + for (int32_t i = 0; i < m_varsSize; i++) + { + // These are allocated separately at the end of the stack + if (m_pVars[i].callArgs) + { + m_pVars[i].offset += m_paramAreaOffset; + int32_t topOffset = m_pVars[i].offset + m_pVars[i].size; + if (finalVarsStackSize < topOffset) + finalVarsStackSize = topOffset; + } + } + m_totalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT); } diff --git a/src/coreclr/interpreter/datastructs.h b/src/coreclr/interpreter/datastructs.h index bcace3cfea4afa..14d7f376e8fb5a 100644 --- a/src/coreclr/interpreter/datastructs.h +++ b/src/coreclr/interpreter/datastructs.h @@ -5,7 +5,7 @@ #define _DATASTRUCTS_H_ template -class PtrArray +class TArray { private: int32_t m_size, m_capacity; @@ -21,14 +21,14 @@ class PtrArray m_array = (T*)realloc(m_array, m_capacity * sizeof(T)); } public: - PtrArray() + TArray() { m_size = 0; m_capacity = 0; m_array = NULL; } - ~PtrArray() + ~TArray() { if (m_capacity > 0) free(m_array); @@ -62,6 +62,67 @@ class PtrArray } return -1; } + + // Assumes elements are unique + void RemoveAt(int32_t index) + { + assert(index < m_size); + m_size--; + // Since this entry is removed, move the last entry into it + if (m_size > 0 && index < m_size) + m_array[index] = m_array[m_size]; + } + + // Assumes elements are unique + void Remove(T element) + { + for (int32_t i = 0; i < m_size; i++) + { + if (element == m_array[i]) + { + RemoveAt(i); + break; + } + } + } + + void Clear() + { + m_size = 0; + } +}; + +// Singly linked list, implemented as a stack +template +struct TSList +{ + T data; + TSList *pNext; + + TSList(T data, TSList *pNext) + { + this->data = data; + this->pNext = pNext; + } + + static TSList* Push(TSList *head, T data) + { + TSList *newHead = new TSList(data, head); + return newHead; + } + + static TSList* Pop(TSList *head) + { + TSList *next = head->pNext; + delete head; + return next; + } + + static void Free(TSList *head) + { + while (head != NULL) + head = Pop(head); + } }; #endif diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 37e8e5ce57e2ee..8f934ff50e24c8 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -20,6 +20,8 @@ OPDEF(INTOP_MOV_4, "mov.4", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_8, "mov.8", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_VT, "mov.vt", 4, 1, 1, InterpOpInt) +OPDEF(INTOP_LDLOCA, "ldloca", 3, 1, 0, InterpOpInt) + OPDEF(INTOP_SWITCH, "switch", 0, 0, 1, InterpOpSwitch) OPDEF(INTOP_BR, "br", 2, 0, 0, InterpOpBranch) From 8ae59528190cbba965d2ba4b2d5dbed5f1675b15 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 6 Mar 2025 17:06:02 +0200 Subject: [PATCH 12/16] Add verbose compilation logging This will have to be enabled via an env var, rather than statically in code. --- src/coreclr/interpreter/compiler.cpp | 228 +++++++++++++++++++++++- src/coreclr/interpreter/compiler.h | 16 +- src/coreclr/interpreter/compileropt.cpp | 31 ++++ src/coreclr/interpreter/eeinterp.cpp | 2 +- src/coreclr/interpreter/intops.cpp | 8 +- src/coreclr/interpreter/intops.h | 2 +- 6 files changed, 278 insertions(+), 9 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index f313178ce4fdcf..8e27f443f7ad05 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -29,6 +29,8 @@ static const InterpType g_interpTypeFromStackType[] = InterpTypeI, // F }; +static const char *g_stackTypeString[] = { "I4", "I8", "R4", "R8", "O ", "VT", "MP", "F " }; + // FIXME Use specific allocators for their intended purpose // Allocator for data that is kept alive throughout application execution, // being freed only if the associated method gets freed. @@ -107,7 +109,7 @@ InterpInst* InterpCompiler::NewIns(int opcode, int dataLen) InterpInst *ins = (InterpInst*)AllocMemPool(insSize); memset(ins, 0, insSize); ins->opcode = opcode; - ins->ilOffset = -1; + ins->ilOffset = m_currentILOffset; m_pLastIns = ins; return ins; } @@ -683,7 +685,7 @@ void InterpCompiler::EmitCode() } } - m_MethodCodeSize = (int32_t)(ip - m_pMethodCode); + m_methodCodeSize = (int32_t)(ip - m_pMethodCode); PatchRelocations(&relocs); } @@ -703,29 +705,53 @@ InterpMethod* InterpCompiler::CreateInterpMethod() int32_t* InterpCompiler::GetCode(int32_t *pCodeSize) { - *pCodeSize = m_MethodCodeSize; + *pCodeSize = m_methodCodeSize; return m_pMethodCode; } InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, - CORINFO_METHOD_INFO* methodInfo) + CORINFO_METHOD_INFO* methodInfo, + bool verbose) { m_methodHnd = methodInfo->ftn; m_compScopeHnd = methodInfo->scope; m_compHnd = compHnd; m_methodInfo = methodInfo; + m_verbose = verbose; } InterpMethod* InterpCompiler::CompileMethod() { + if (m_verbose) + { + printf("Interpreter compile method "); + PrintMethodName(m_methodHnd); + printf("\n"); + } + CreateILVars(); GenerateCode(m_methodInfo); + if (m_verbose) + { + printf("\nUnoptimized IR:\n"); + PrintCode(); + } + AllocOffsets(); EmitCode(); + if (m_verbose) + { + printf("\nCompiled method: "); + PrintMethodName(m_methodHnd); + printf("\nLocals size %d\n", m_totalVarsStackSize); + PrintCompiledCode(); + printf("\n"); + } + return CreateInterpMethod(); } @@ -828,6 +854,9 @@ void InterpCompiler::CreateILVars() offset = 0; + if (m_verbose) + printf("\nCreate IL Vars:\n"); + CORINFO_ARG_LIST_HANDLE sigArg = m_methodInfo->args.args; for (int i = 0; i < numArgs; i++) { InterpType interpType; @@ -857,6 +886,8 @@ void InterpCompiler::CreateILVars() m_pVars[i].size = size; offset = ALIGN_UP_TO(offset, align); m_pVars[i].offset = offset; + if (m_verbose) + printf("alloc arg var %d to offset %d\n", i, offset); offset += size; } @@ -881,6 +912,8 @@ void InterpCompiler::CreateILVars() m_pVars[index].size = size; offset = ALIGN_UP_TO(offset, align); m_pVars[index].offset = offset; + if (m_verbose) + printf("alloc local var %d to offset %d\n", index, offset); offset += size; sigArg = m_compHnd->getArgNext(sigArg); } @@ -1423,9 +1456,13 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) goto exit_bad_code; int32_t insOffset = (int32_t)(m_ip - m_pILCode); + m_currentILOffset = insOffset; + InterpBasicBlock *pNewBB = m_ppOffsetToBB[insOffset]; if (pNewBB != NULL && m_pCBB != pNewBB) { + if (m_verbose) + printf("BB%d (IL_%04x):\n", pNewBB->index, pNewBB->ilOffset); // If we were emitting into previous bblock, we are finished now if (m_pCBB->emitState == BBStateEmitting) m_pCBB->emitState = BBStateEmitted; @@ -1492,6 +1529,8 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) } else { + if (m_verbose) + printf("BB%d without initialized stack\n", pNewBB->index); assert(pNewBB->emitState == BBStateNotEmitted); needsRetryEmit = true; // linking to its next bblock, if its the case, will only happen @@ -1516,6 +1555,20 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_ppOffsetToBB[insOffset] = m_pCBB; + if (m_verbose) + { + const uint8_t *ip = m_ip; + printf("IL_%04x %-10s, sp %d, %s", + (int32_t)(m_ip - m_pILCode), + CEEOpName(CEEDecodeOpcode(&ip)), (int32_t)(m_pStackPointer - m_pStackBase), + m_pStackPointer > m_pStackBase ? g_stackTypeString[m_pStackPointer[-1].type] : " "); + if (m_pStackPointer > m_pStackBase && + (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeVT) && + m_pStackPointer[-1].clsHnd != NULL) + PrintClassName(m_pStackPointer[-1].clsHnd); + printf("\n"); + } + uint8_t opcode = *m_ip; switch (opcode) { @@ -2208,6 +2261,8 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) linkBBlocks = false; needsRetryEmit = false; + if (m_verbose) + printf("retry emit\n"); goto retry_emit; } @@ -2238,3 +2293,168 @@ void InterpCompiler::UnlinkUnreachableBBlocks() } } } + +void InterpCompiler::PrintClassName(CORINFO_CLASS_HANDLE cls) +{ + char className[100]; + m_compHnd->printClassName(cls, className, 100); + printf("%s", className); +} + +void InterpCompiler::PrintMethodName(CORINFO_METHOD_HANDLE method) +{ + char methodName[100]; + + CORINFO_CLASS_HANDLE cls = m_compHnd->getMethodClass(method); + PrintClassName(cls); + + m_compHnd->printMethodName(method, methodName, 100); + printf(".%s", methodName); +} + +void InterpCompiler::PrintCode() +{ + for (InterpBasicBlock *pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) + PrintBBCode(pBB); +} + +void InterpCompiler::PrintBBCode(InterpBasicBlock *pBB) +{ + printf("BB%d:\n", pBB->index); + for (InterpInst *ins = pBB->pFirstIns; ins != NULL; ins = ins->pNext) + PrintIns(ins); +} + +void InterpCompiler::PrintIns(InterpInst *ins) +{ + int32_t opcode = ins->opcode; + if (ins->ilOffset == -1) + printf("IL_----: %-14s", InterpOpName(opcode)); + else + printf("IL_%04x: %-14s", ins->ilOffset, InterpOpName(opcode)); + + if (g_interpOpDVars[opcode] > 0) + printf(" [%d <-", ins->dVar); + else + printf(" [nil <-"); + + if (g_interpOpSVars[opcode] > 0) + { + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + { + if (ins->sVars[i] == CALL_ARGS_SVAR) + { + printf(" c:"); + if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs) + { + int *callArgs = ins->info.pCallInfo->pCallArgs; + while (*callArgs != -1) + { + printf(" %d", *callArgs); + callArgs++; + } + } + } + else + { + printf(" %d", ins->sVars[i]); + } + } + printf("],"); + } + else + { + printf(" nil],"); + } + + // LDLOCA has special semantics, it has data in sVars[0], but it doesn't have any sVars + if (opcode == INTOP_LDLOCA) + printf(" %d", ins->sVars[0]); + else + PrintInsData(ins, ins->ilOffset, &ins->data[0], ins->opcode); + printf("\n"); +} + +void InterpCompiler::PrintInsData(InterpInst *ins, int32_t insOffset, const int32_t *pData, int32_t opcode) +{ + switch (g_interpOpArgType[opcode]) { + case InterpOpNoArgs: + break; + case InterpOpInt: + printf(" %d", *pData); + break; + case InterpOpBranch: + if (ins) + printf(" BB%d", ins->info.pTargetBB->index); + else + printf(" IR_%04x", insOffset + *pData); + break; + case InterpOpSwitch: + { + int32_t n = *pData; + printf(" ("); + for (int i = 0; i < n; i++) + { + if (i > 0) + printf(", "); + + if (ins) + printf("BB%d", ins->info.ppTargetBBTable[i]->index); + else + printf("IR_%04x", insOffset + 3 + i + *(pData + 1 + i)); + } + printf(")"); + break; + } + case InterpOpMethodToken: + { + CORINFO_METHOD_HANDLE mh = (CORINFO_METHOD_HANDLE)((size_t)m_dataItems.Get(*pData) & ~INTERP_METHOD_DESC_TAG); + printf(" "); + PrintMethodName(mh); + break; + } + default: + assert(0); + break; + } +} + +void InterpCompiler::PrintCompiledCode() +{ + const int32_t *ip = m_pMethodCode; + const int32_t *end = m_pMethodCode + m_methodCodeSize; + + while (ip < end) + { + PrintCompiledIns(ip, m_pMethodCode); + ip = InterpNextOp(ip); + } +} + +void InterpCompiler::PrintCompiledIns(const int32_t *ip, const int32_t *start) +{ + int32_t opcode = *ip; + int32_t insOffset = (int32_t)(ip - start); + + printf("IR_%04x: %-14s", insOffset, InterpOpName(opcode)); + ip++; + + if (g_interpOpDVars[opcode] > 0) + printf(" [%d <-", *ip++); + else + printf(" [nil <-"); + + if (g_interpOpSVars[opcode] > 0) + { + for (int i = 0; i < g_interpOpSVars[opcode]; i++) + printf(" %d", *ip++); + printf("],"); + } + else + { + printf(" nil],"); + } + + PrintInsData(NULL, insOffset, ip, opcode); + printf("\n"); +} diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 61ef7b1d083351..ff37cb38aee1b5 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -211,12 +211,14 @@ class InterpCompiler CORINFO_MODULE_HANDLE m_compScopeHnd; COMP_HANDLE m_compHnd; CORINFO_METHOD_INFO* m_methodInfo; + bool m_verbose; static int32_t InterpGetMovForType(InterpType interpType, bool signExtend); uint8_t* m_ip; uint8_t* m_pILCode; int32_t m_ILCodeSize; + int32_t m_currentILOffset; // FIXME during compilation this should be a hashtable for fast lookup of duplicates TArray m_dataItems; @@ -323,7 +325,7 @@ class InterpCompiler // Passes int32_t* m_pMethodCode; - int32_t m_MethodCodeSize; // in int32_t + int32_t m_methodCodeSize; // in int32_t void AllocOffsets(); int32_t ComputeCodeSize(); @@ -332,9 +334,19 @@ class InterpCompiler void PatchRelocations(TArray *relocs); InterpMethod* CreateInterpMethod(); bool CreateBasicBlocks(CORINFO_METHOD_INFO* methodInfo); + + // Debug + void PrintClassName(CORINFO_CLASS_HANDLE cls); + void PrintMethodName(CORINFO_METHOD_HANDLE method); + void PrintCode(); + void PrintBBCode(InterpBasicBlock *pBB); + void PrintIns(InterpInst *ins); + void PrintInsData(InterpInst *ins, int32_t offset, const int32_t *pData, int32_t opcode); + void PrintCompiledCode(); + void PrintCompiledIns(const int32_t *ip, const int32_t *start); public: - InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo); + InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo, bool verbose); InterpMethod* CompileMethod(); diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index c1b7a35f3f6b63..cdfbd04cabb577 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -54,6 +54,8 @@ void InterpCompiler::InitializeGlobalVar(int32_t var, int bbIndex) { AllocGlobalVarOffset(var); m_pVars[var].global = true; + if (m_verbose) + printf("alloc global var %d to offset %d\n", var, m_pVars[var].offset); } } @@ -82,6 +84,8 @@ void InterpCompiler::InitializeGlobalVars() { AllocGlobalVarOffset(var); m_pVars[var].global = true; + if (m_verbose) + printf("alloc global var %d to offset %d\n", var, m_pVars[var].offset); } } ForEachInsVar(pIns, (void*)(size_t)pBB->index, &InterpCompiler::InitializeGlobalVarCB); @@ -215,6 +219,9 @@ void InterpCompiler::AllocOffsets() InitializeGlobalVars(); + if (m_verbose) + printf("\nAllocating var offsets\n"); + int finalVarsStackSize = m_totalVarsStackSize; // We now have the top of stack offset. All local regs are allocated after this offset, with each basic block @@ -223,6 +230,9 @@ void InterpCompiler::AllocOffsets() InterpInst *pIns; int insIndex = 0; + if (m_verbose) + printf("BB%d\n", pBB->index); + // All data structs should be left empty after a bblock iteration assert(m_pActiveVars->GetSize() == 0); assert(m_pActiveCalls->GetSize() == 0); @@ -232,6 +242,7 @@ void InterpCompiler::AllocOffsets() { if (pIns->opcode == INTOP_NOP) continue; + // TODO NewObj will be marked as noCallArgs if (pIns->flags & INTERP_INST_FLAG_CALL) { @@ -289,6 +300,12 @@ void InterpCompiler::AllocOffsets() if (opcode == INTOP_NOP) continue; + if (m_verbose) + { + printf("\tins_index %d\t", insIndex); + PrintIns(pIns); + } + // Expire source vars. We first mark them as not alive and then compact the array for (int i = 0; i < g_interpOpSVars[opcode]; i++) { @@ -327,6 +344,8 @@ void InterpCompiler::AllocOffsets() else if (!m_pVars[var].global && m_pVars[var].offset == -1) { AllocVarOffset(var, ¤tOffset); + if (m_verbose) + printf("alloc var %d to offset %d\n", var, m_pVars[var].offset); if (currentOffset > finalVarsStackSize) finalVarsStackSize = currentOffset; @@ -344,6 +363,18 @@ void InterpCompiler::AllocOffsets() } } } + + if (m_verbose) + { + printf("active vars:"); + for (int i = 0; i < m_pActiveVars->GetSize(); i++) + { + int32_t var = m_pActiveVars->Get(i); + if (m_pVars[var].alive) + printf(" %d (end %d),", var, m_pVars[var].liveEnd); + } + printf("\n"); + } insIndex++; } } diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp index 9f154ba9ba5b97..0f9c02b448f1a0 100644 --- a/src/coreclr/interpreter/eeinterp.cpp +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -78,7 +78,7 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, return CORJIT_SKIPPED; } - InterpCompiler compiler(compHnd, methodInfo); + InterpCompiler compiler(compHnd, methodInfo, false); InterpMethod *pMethod = compiler.CompileMethod(); int32_t IRCodeSize; diff --git a/src/coreclr/interpreter/intops.cpp b/src/coreclr/interpreter/intops.cpp index dc31e8f3af2de0..94f259d14d10bc 100644 --- a/src/coreclr/interpreter/intops.cpp +++ b/src/coreclr/interpreter/intops.cpp @@ -50,9 +50,15 @@ const InterpOpArgType g_interpOpArgType[] = { #undef OPDEF }; -const uint8_t* InterpNextOp(const uint8_t *ip) +const int32_t* InterpNextOp(const int32_t *ip) { int len = g_interpOpLen[*ip]; + if (len == 0) + { + assert(*ip == INTOP_SWITCH); + len = 3 + ip[2]; + } + return ip + len; } diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index 2c37cd6f3b62aa..d61fed04fba517 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -22,7 +22,7 @@ extern const uint8_t g_interpOpLen[]; extern const int g_interpOpDVars[]; extern const int g_interpOpSVars[]; extern const InterpOpArgType g_interpOpArgType[]; -extern const uint8_t* InterpNextOp(const uint8_t* ip); +extern const int32_t* InterpNextOp(const int32_t* ip); // This, instead of an array of pointers, to optimize away a pointer and a relocation per string. extern const uint32_t g_interpOpNameOffsets[]; From a022dedafd069bf47f91e65d8d4da4d7ecddfa67 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 6 Mar 2025 20:04:37 +0200 Subject: [PATCH 13/16] Add a temporary intrinsic for Environment.FailFast Because we currently don't have support for interoping between interpreter and jit, in order to signal test failure in interpreter test we will just crash instead. --- src/coreclr/interpreter/compiler.cpp | 37 ++++++++++++++++++++++++++++ src/coreclr/interpreter/compiler.h | 1 + src/coreclr/interpreter/intops.def | 4 +++ src/coreclr/interpreter/intops.h | 2 ++ src/coreclr/vm/interpexec.cpp | 11 +++++++++ 5 files changed, 55 insertions(+) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 8e27f443f7ad05..3c283d3f84ea4e 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -1352,6 +1352,31 @@ int32_t InterpCompiler::GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle) return GetDataItemIndex((void*)data); } +bool InterpCompiler::EmitCallIntrinsics(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO sig) +{ + const char *className = NULL; + const char *namespaceName = NULL; + const char *methodName = m_compHnd->getMethodNameFromMetadata(method, &className, &namespaceName, NULL, 0); + int32_t opcode = -1; + + if (namespaceName && !strcmp(namespaceName, "System")) + { + if (className && !strcmp(className, "Environment")) + { + if (methodName && !strcmp(methodName, "FailFast")) + opcode = INTOP_FAILFAST; // to be removed, not really an intrisic + } + } + + if (opcode != -1) + { + AddIns(opcode); + return true; + } + + return false; +} + void InterpCompiler::EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall) { uint32_t token = getU4LittleEndian(m_ip + 1); @@ -1368,6 +1393,12 @@ void InterpCompiler::EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readon CORINFO_SIG_INFO targetSignature; m_compHnd->getMethodSig(targetMethod, &targetSignature); + if (EmitCallIntrinsics(targetMethod, targetSignature)) + { + m_ip += 5; + return; + } + // Process sVars int numArgs = targetSignature.numArgs + targetSignature.hasThis(); m_pStackPointer -= numArgs; @@ -1598,6 +1629,12 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_pLastIns->SetDVar(m_pStackPointer[-1].var); m_ip += 2; break; + case CEE_LDNULL: + AddIns(INTOP_LDNULL); + PushStackType(StackTypeO, NULL); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); + m_ip++; + break; case CEE_LDARG_S: EmitLoadVar(m_ip[1]); diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index ff37cb38aee1b5..86447e0e1b637d 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -308,6 +308,7 @@ class InterpCompiler void EmitShiftOp(int32_t opBase); void EmitCompareOp(int32_t opBase); void EmitCall(CORINFO_CLASS_HANDLE constrainedClass, bool readonly, bool tailcall); + bool EmitCallIntrinsics(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO sig); // Var Offset allocator TArray *m_pActiveCalls; diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 8f934ff50e24c8..6cc255973dbc88 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -11,6 +11,8 @@ OPDEF(INTOP_RET, "ret", 2, 0, 1, InterpOpNoArgs) OPDEF(INTOP_RET_VOID, "ret.void", 1, 0, 0, InterpOpNoArgs) OPDEF(INTOP_LDC_I4, "ldc.i4", 3, 1, 0, InterpOpInt) +OPDEF(INTOP_LDC_I4_0, "ldc.i4.0", 2, 1, 0, InterpOpNoArgs) +OPDEF(INTOP_LDC_I8_0, "ldc.i8.0", 2, 1, 0, InterpOpNoArgs) OPDEF(INTOP_MOV_I4_I1, "mov.i4.i1", 3, 1, 1, InterpOpNoArgs) OPDEF(INTOP_MOV_I4_U1, "mov.i4.u1", 3, 1, 1, InterpOpNoArgs) @@ -190,3 +192,5 @@ OPDEF(INTOP_CLT_UN_R8, "clt.un.r8", 4, 1, 2, InterpOpNoArgs) // Calls OPDEF(INTOP_CALL, "call", 4, 1, 1, InterpOpMethodToken) + +OPDEF(INTOP_FAILFAST, "failfast", 1, 0, 0, InterpOpNoArgs) diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index d61fed04fba517..7d0104df22bbb8 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -38,8 +38,10 @@ int CEEOpcodeSize(const uint8_t *ip, const uint8_t *codeEnd); #ifdef TARGET_64BIT #define INTOP_MOV_P INTOP_MOV_8 +#define INTOP_LDNULL INTOP_LDC_I8_0 #else #define INTOP_MOV_P INTOP_MOV_4 +#define INTOP_LDNULL INTOP_LDC_I4_0 #endif static inline bool InterpOpIsUncondBranch(int32_t opcode) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 271ade7a8bf843..80e8c25802a138 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -52,6 +52,14 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh LOCAL_VAR(ip[1], int32_t) = ip[2]; ip += 3; break; + case INTOP_LDC_I4_0: + LOCAL_VAR(ip[1], int32_t) = 0; + ip += 2; + break; + case INTOP_LDC_I8_0: + LOCAL_VAR(ip[1], int64_t) = 0; + ip += 2; + break; case INTOP_RET: // Return stack slot sized value *(int64_t*)pFrame->pRetVal = LOCAL_VAR(ip[1], int64_t); @@ -726,6 +734,9 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh pThreadContext->pStackPointer = stack + pMethod->allocaSize; break; } + case INTOP_FAILFAST: + assert(0); + break; default: assert(0); break; From 0dc9815dcbe08124a74e8d728621374e1f57fdb7 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 7 Mar 2025 18:42:29 +0200 Subject: [PATCH 14/16] Add a few sanity tests to ensure new interp functionality mostly works Minor fixes to ensure tests work. Add support for returns of any type. Add opcode for loading full I4. --- src/coreclr/interpreter/compiler.cpp | 27 +++++++++--- src/coreclr/interpreter/intops.def | 1 + src/coreclr/vm/interpexec.cpp | 3 ++ src/tests/JIT/interpreter/Interpreter.cs | 56 +++++++++++++++++++++++- 4 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 3c283d3f84ea4e..8838dcabf5538c 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -808,6 +808,8 @@ static InterpType GetInterpType(CorInfoType corInfoType) case CORINFO_TYPE_VALUECLASS: case CORINFO_TYPE_REFANY: return InterpTypeVT; + case CORINFO_TYPE_VOID: + return InterpTypeVoid; default: assert(0); break; @@ -1629,6 +1631,13 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_pLastIns->SetDVar(m_pStackPointer[-1].var); m_ip += 2; break; + case CEE_LDC_I4: + AddIns(INTOP_LDC_I4); + m_pLastIns->data[0] = getI4LittleEndian(m_ip + 1); + PushStackType(StackTypeI4, NULL); + m_pLastIns->SetDVar(m_pStackPointer[-1].var); + m_ip += 5; + break; case CEE_LDNULL: AddIns(INTOP_LDNULL); PushStackType(StackTypeO, NULL); @@ -1677,21 +1686,27 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) case CEE_RET: { CORINFO_SIG_INFO sig = methodInfo->args; - if (sig.retType == CORINFO_TYPE_VOID) + InterpType retType = GetInterpType(sig.retType); + + if (retType == InterpTypeVoid) { AddIns(INTOP_RET_VOID); } - else if (sig.retType == CORINFO_TYPE_INT) + else if (retType == InterpTypeVT) { CHECK_STACK(1); - AddIns(INTOP_RET); + AddIns(INTOP_RET_VT); m_pStackPointer--; - m_pLastIns->SetSVar(m_pStackPointer[0].var); + int32_t retVar = m_pStackPointer[0].var; + m_pLastIns->SetSVar(retVar); + m_pLastIns->data[0] = m_pVars[retVar].size; } else { - // FIXME - assert(0); + CHECK_STACK(1); + AddIns(INTOP_RET); + m_pStackPointer--; + m_pLastIns->SetSVar(m_pStackPointer[0].var); } m_ip++; break; diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 6cc255973dbc88..fcdf33aa11575c 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -8,6 +8,7 @@ OPDEF(INTOP_NOP, "nop", 1, 0, 0, InterpOpNoArgs) OPDEF(INTOP_RET, "ret", 2, 0, 1, InterpOpNoArgs) +OPDEF(INTOP_RET_VT, "ret.vt", 3, 0, 1, InterpOpInt) OPDEF(INTOP_RET_VOID, "ret.void", 1, 0, 0, InterpOpNoArgs) OPDEF(INTOP_LDC_I4, "ldc.i4", 3, 1, 0, InterpOpInt) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 80e8c25802a138..75b4dbb66a3132 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -64,6 +64,9 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh // Return stack slot sized value *(int64_t*)pFrame->pRetVal = LOCAL_VAR(ip[1], int64_t); goto EXIT_FRAME; + case INTOP_RET_VT: + memmove(pFrame->pRetVal, stack + ip[1], ip[2]); + goto EXIT_FRAME; case INTOP_RET_VOID: goto EXIT_FRAME; diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index 0dc20964fc6a50..d26415da5a4d12 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -15,7 +15,61 @@ static int Main(string[] args) [MethodImpl(MethodImplOptions.NoInlining)] public static void RunInterpreterTests() { -// Console.WriteLine("Run interp tests"); +// Console.WriteLine("Run interp tests"); + if (SumN(50) != 1275) + Environment.FailFast(null); + if (Mul4(53, 24, 13, 131) != 2166216) + Environment.FailFast(null); + + TestSwitch(); + + if (!PowLoop(20, 10, 1661992960)) + Environment.FailFast(null); + } + + public static int Mul4(int a, int b, int c, int d) + { + return a * b * c * d; } + public static long SumN(int n) + { + if (n == 1) + return 1; + return (long)SumN(n - 1) + n; + } + + public static int SwitchOp(int a, int b, int op) + { + switch (op) + { + case 0: + return a + b; + case 1: + return a - b; + case 2: + return a * b; + default: + return 42; + } + } + + public static void TestSwitch() + { + int n0 = SwitchOp (20, 6, 0); // 26 + int n1 = SwitchOp (20, 6, 1); // 14 + int n2 = SwitchOp (20, 6, 2); // 120 + int n3 = SwitchOp (20, 6, 3); // 42 + + if ((n0 + n1 + n2 + n3) != 202) + Environment.FailFast(null); + } + + public static bool PowLoop(int n, long nr, int expected) + { + long ret = 1; + for (int i = 0; i < n; i++) + ret *= nr; + return (int)ret == expected; + } } From cef64027f8704383a6afe0190b016618535b5637 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 17 Mar 2025 20:43:42 +0200 Subject: [PATCH 15/16] Address PR feedback --- src/coreclr/interpreter/compiler.cpp | 137 +++++++++++------------- src/coreclr/interpreter/compiler.h | 70 +++++++++++- src/coreclr/interpreter/compileropt.cpp | 29 ++--- src/coreclr/interpreter/eeinterp.cpp | 2 +- src/coreclr/interpreter/intops.h | 17 ++- src/coreclr/vm/interpexec.cpp | 7 +- src/coreclr/vm/interpexec.h | 2 +- 7 files changed, 156 insertions(+), 108 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 8838dcabf5538c..a301e5d5119d39 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -14,7 +14,7 @@ static const StackType g_stackTypeFromInterpType[] = StackTypeR8, // R8 StackTypeO, // O StackTypeVT, // VT - StackTypeMP, // ByRef + StackTypeByRef, // ByRef }; static const InterpType g_interpTypeFromStackType[] = @@ -208,7 +208,7 @@ void InterpCompiler::ForEachInsSVar(InterpInst *ins, void *pData, void (InterpCo { if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs) { int *callArgs = ins->info.pCallInfo->pCallArgs; - while (*callArgs != -1) + while (*callArgs != CALL_ARGS_TERMINATOR) { (this->*callback) (callArgs, pData); callArgs++; @@ -232,14 +232,12 @@ void InterpCompiler::ForEachInsVar(InterpInst *ins, void *pData, void (InterpCom } -InterpBasicBlock* InterpCompiler::AllocBB() +InterpBasicBlock* InterpCompiler::AllocBB(int32_t ilOffset) { InterpBasicBlock *bb = (InterpBasicBlock*)AllocMemPool(sizeof(InterpBasicBlock)); - memset(bb, 0, sizeof(InterpBasicBlock)); - bb->ilOffset = -1; - bb->nativeOffset = -1; - bb->stackHeight = -1; - bb->index = m_BBCount++; + + new (bb) InterpBasicBlock (m_BBCount, ilOffset); + m_BBCount++; return bb; } @@ -249,9 +247,8 @@ InterpBasicBlock* InterpCompiler::GetBB(int32_t ilOffset) if (!bb) { - bb = AllocBB (); + bb = AllocBB(ilOffset); - bb->ilOffset = ilOffset; m_ppOffsetToBB[ilOffset] = bb; } @@ -361,6 +358,7 @@ void InterpCompiler::UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to) to->inCount--; } +// These are moves between vars, operating only on the interpreter stack int32_t InterpCompiler::InterpGetMovForType(InterpType interpType, bool signExtend) { switch (interpType) @@ -465,14 +463,8 @@ int32_t InterpCompiler::CreateVarExplicit(InterpType interpType, CORINFO_CLASS_H m_pVars = (InterpVar*) ReallocTemporary(m_pVars, m_varsCapacity * sizeof(InterpVar)); } InterpVar *var = &m_pVars[m_varsSize]; - memset(var, 0, sizeof(InterpVar)); - var->interpType = interpType; - var->clsHnd = clsHnd; - var->size = size; - var->offset = -1; - var->liveStart = -1; - var->bbIndex = -1; + new (var) InterpVar(interpType, clsHnd, size); m_varsSize++; return m_varsSize - 1; @@ -512,7 +504,7 @@ void InterpCompiler::EnsureStack(int additional) do { \ m_hasInvalidCode = true; \ return; \ - } while (0); + } while (0) bool InterpCompiler::CheckStackHelper(int n) { @@ -583,9 +575,7 @@ int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArraytype = RelocSwitch; - reloc->offset = (int32_t)(ip - m_pMethodCode); - reloc->pTargetBB = ins->info.ppTargetBBTable [i]; + new (reloc) Reloc(RelocSwitch, (int32_t)(ip - m_pMethodCode), ins->info.ppTargetBBTable[i], 0); relocs->Add(reloc); *ip++ = (int32_t)0xdeadbeef; } @@ -609,16 +599,17 @@ int32_t* InterpCompiler::EmitCodeIns(int32_t *ip, InterpInst *ins, TArraytype = RelocLongBranch; - reloc->skip = g_interpOpSVars[opcode]; - reloc->offset = brBaseOffset; - reloc->pTargetBB = ins->info.pTargetBB; + new (reloc) Reloc(RelocLongBranch, brBaseOffset, ins->info.pTargetBB, g_interpOpSVars[opcode]); relocs->Add(reloc); *ip++ = (int32_t)0xdeadbeef; } } else { + // Default code emit for an instruction. The opcode was already emitted above. + // We emit the offset for the instruction destination, then for every single source + // variable we emit another offset. Finally, we will emit any additional data needed + // by the instruction. if (g_interpOpDVars[opcode]) *ip++ = m_pVars[ins->dVar].offset; @@ -722,27 +713,32 @@ InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, InterpMethod* InterpCompiler::CompileMethod() { +#ifdef DEBUG if (m_verbose) { printf("Interpreter compile method "); PrintMethodName(m_methodHnd); printf("\n"); } +#endif CreateILVars(); GenerateCode(m_methodInfo); +#ifdef DEBUG if (m_verbose) { printf("\nUnoptimized IR:\n"); PrintCode(); } +#endif AllocOffsets(); EmitCode(); +#ifdef DEBUG if (m_verbose) { printf("\nCompiled method: "); @@ -751,6 +747,7 @@ InterpMethod* InterpCompiler::CompileMethod() PrintCompiledCode(); printf("\n"); } +#endif return CreateInterpMethod(); } @@ -765,7 +762,7 @@ void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type newInst = AddIns(convOp); newInst->SetSVar(sp->var); - sp->Init(type); + new (sp) StackInfo(type); int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE); sp->var = var; newInst->SetDVar(var); @@ -817,7 +814,7 @@ static InterpType GetInterpType(CorInfoType corInfoType) return InterpTypeVoid; } -int32_t InterpCompiler::GetInterpTypeSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign) +int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign) { int32_t size, align; if (interpType == InterpTypeVT) @@ -856,8 +853,7 @@ void InterpCompiler::CreateILVars() offset = 0; - if (m_verbose) - printf("\nCreate IL Vars:\n"); + INTERP_DUMP("\nCreate IL Vars:\n"); CORINFO_ARG_LIST_HANDLE sigArg = m_methodInfo->args.args; for (int i = 0; i < numArgs; i++) { @@ -878,18 +874,16 @@ void InterpCompiler::CreateILVars() interpType = GetInterpType(argCorType); sigArg = m_compHnd->getArgNext(sigArg); } + size = GetInterpTypeStackSize(argClass, interpType, &align); + + new (&m_pVars[i]) InterpVar(interpType, argClass, size); - m_pVars[i].interpType = interpType; - m_pVars[i].clsHnd = argClass; m_pVars[i].global = true; m_pVars[i].ILGlobal = true; - - size = GetInterpTypeSize(argClass, interpType, &align); m_pVars[i].size = size; offset = ALIGN_UP_TO(offset, align); m_pVars[i].offset = offset; - if (m_verbose) - printf("alloc arg var %d to offset %d\n", i, offset); + INTERP_DUMP("alloc arg var %d to offset %d\n", i, offset); offset += size; } @@ -904,18 +898,15 @@ void InterpCompiler::CreateILVars() CorInfoType argCorType = strip(m_compHnd->getArgType(&m_methodInfo->locals, sigArg, &argClass)); interpType = GetInterpType(argCorType); + size = GetInterpTypeStackSize(argClass, interpType, &align); + + new (&m_pVars[index]) InterpVar(interpType, argClass, size); - m_pVars[index].interpType = interpType; - m_pVars[index].clsHnd = argClass; m_pVars[index].global = true; m_pVars[index].ILGlobal = true; - - size = GetInterpTypeSize(argClass, interpType, &align); - m_pVars[index].size = size; offset = ALIGN_UP_TO(offset, align); m_pVars[index].offset = offset; - if (m_verbose) - printf("alloc local var %d to offset %d\n", index, offset); + INTERP_DUMP("alloc local var %d to offset %d\n", index, offset); offset += size; sigArg = m_compHnd->getArgNext(sigArg); } @@ -1060,7 +1051,7 @@ void InterpCompiler::EmitBranch(InterpOpcode opcode, int32_t ilOffset) void InterpCompiler::EmitOneArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize) { CHECK_STACK_RET_VOID(1); - StackType argType = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeMP) ? StackTypeI : m_pStackPointer[-1].type; + StackType argType = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-1].type; // offset the opcode to obtain the type specific I4/I8/R4/R8 variant. InterpOpcode opcodeArgType = (InterpOpcode)(opcode + argType - StackTypeI4); m_pStackPointer--; @@ -1078,8 +1069,8 @@ void InterpCompiler::EmitOneArgBranch(InterpOpcode opcode, int32_t ilOffset, int void InterpCompiler::EmitTwoArgBranch(InterpOpcode opcode, int32_t ilOffset, int insSize) { CHECK_STACK_RET_VOID(2); - StackType argType1 = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeMP) ? StackTypeI : m_pStackPointer[-1].type; - StackType argType2 = (m_pStackPointer[-2].type == StackTypeO || m_pStackPointer[-2].type == StackTypeMP) ? StackTypeI : m_pStackPointer[-2].type; + StackType argType1 = (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-1].type; + StackType argType2 = (m_pStackPointer[-2].type == StackTypeO || m_pStackPointer[-2].type == StackTypeByRef) ? StackTypeI : m_pStackPointer[-2].type; // Since branch opcodes only compare args of the same type, handle implicit conversions before // emitting the conditional branch @@ -1172,11 +1163,11 @@ void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) StackType typeRes; - if (opBase == INTOP_ADD_I4 && (type1 == StackTypeMP || type2 == StackTypeMP)) + if (opBase == INTOP_ADD_I4 && (type1 == StackTypeByRef || type2 == StackTypeByRef)) { if (type1 == type2) INVALID_CODE_RET_VOID; - if (type1 == StackTypeMP) + if (type1 == StackTypeByRef) { if (type2 == StackTypeI4) { @@ -1184,11 +1175,11 @@ void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); type2 = StackTypeI8; #endif - typeRes = StackTypeMP; + typeRes = StackTypeByRef; } else if (type2 == StackTypeI) { - typeRes = StackTypeMP; + typeRes = StackTypeByRef; } else { @@ -1197,18 +1188,18 @@ void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) } else { - // type2 == StackTypeMP + // type2 == StackTypeByRef if (type1 == StackTypeI4) { #ifdef TARGET_64BIT EmitConv(m_pStackPointer - 2, NULL, StackTypeI8, INTOP_CONV_I8_I4); type1 = StackTypeI8; #endif - typeRes = StackTypeMP; + typeRes = StackTypeByRef; } else if (type1 == StackTypeI) { - typeRes = StackTypeMP; + typeRes = StackTypeByRef; } else { @@ -1216,7 +1207,7 @@ void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) } } } - else if (opBase == INTOP_SUB_I4 && type1 == StackTypeMP) + else if (opBase == INTOP_SUB_I4 && type1 == StackTypeByRef) { if (type2 == StackTypeI4) { @@ -1224,13 +1215,13 @@ void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_I8_I4); type2 = StackTypeI8; #endif - typeRes = StackTypeMP; + typeRes = StackTypeByRef; } else if (type2 == StackTypeI) { - typeRes = StackTypeMP; + typeRes = StackTypeByRef; } - else if (type2 == StackTypeMP) + else if (type2 == StackTypeByRef) { typeRes = StackTypeI; } @@ -1271,7 +1262,7 @@ void InterpCompiler::EmitBinaryArithmeticOp(int32_t opBase) // The argument opcode is for the base _I4 instruction. Depending on the type of the result // we compute the specific variant, _I4/_I8/_R4 or R8. - int32_t typeOffset = ((typeRes == StackTypeMP) ? StackTypeI : typeRes) - StackTypeI4; + int32_t typeOffset = ((typeRes == StackTypeByRef) ? StackTypeI : typeRes) - StackTypeI4; int32_t finalOpcode = opBase + typeOffset; m_pStackPointer -= 2; @@ -1287,7 +1278,7 @@ void InterpCompiler::EmitUnaryArithmeticOp(int32_t opBase) StackType stackType = m_pStackPointer[-1].type; int32_t finalOpcode = opBase + (stackType - StackTypeI4); - if (stackType == StackTypeMP || stackType == StackTypeO) + if (stackType == StackTypeByRef || stackType == StackTypeO) INVALID_CODE_RET_VOID; if (opBase == INTOP_NOT_I4 && (stackType != StackTypeI4 && stackType != StackTypeI8)) INVALID_CODE_RET_VOID; @@ -1321,7 +1312,7 @@ void InterpCompiler::EmitShiftOp(int32_t opBase) void InterpCompiler::EmitCompareOp(int32_t opBase) { CHECK_STACK_RET_VOID(2); - if (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeMP) + if (m_pStackPointer[-1].type == StackTypeO || m_pStackPointer[-1].type == StackTypeByRef) { AddIns(opBase + StackTypeI - StackTypeI4); } @@ -1464,8 +1455,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_stackCapacity = methodInfo->maxStack + 1; m_pStackBase = m_pStackPointer = (StackInfo*)AllocTemporary(sizeof(StackInfo) * m_stackCapacity); - m_pEntryBB = AllocBB(); - m_pEntryBB->ilOffset = 0; + m_pEntryBB = AllocBB(0); m_pEntryBB->emitState = BBStateEmitting; m_pEntryBB->stackHeight = 0; m_pCBB = m_pEntryBB; @@ -1494,8 +1484,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) InterpBasicBlock *pNewBB = m_ppOffsetToBB[insOffset]; if (pNewBB != NULL && m_pCBB != pNewBB) { - if (m_verbose) - printf("BB%d (IL_%04x):\n", pNewBB->index, pNewBB->ilOffset); + INTERP_DUMP("BB%d (IL_%04x):\n", pNewBB->index, pNewBB->ilOffset); // If we were emitting into previous bblock, we are finished now if (m_pCBB->emitState == BBStateEmitting) m_pCBB->emitState = BBStateEmitted; @@ -1562,8 +1551,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) } else { - if (m_verbose) - printf("BB%d without initialized stack\n", pNewBB->index); + INTERP_DUMP("BB%d without initialized stack\n", pNewBB->index); assert(pNewBB->emitState == BBStateNotEmitted); needsRetryEmit = true; // linking to its next bblock, if its the case, will only happen @@ -1588,6 +1576,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) m_ppOffsetToBB[insOffset] = m_pCBB; +#ifdef DEBUG if (m_verbose) { const uint8_t *ip = m_ip; @@ -1601,6 +1590,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) PrintClassName(m_pStackPointer[-1].clsHnd); printf("\n"); } +#endif uint8_t opcode = *m_ip; switch (opcode) @@ -1823,7 +1813,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); #endif break; - case StackTypeMP: + case StackTypeByRef: case StackTypeO: EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); break; @@ -1856,7 +1846,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) #endif break; case StackTypeO: - case StackTypeMP: + case StackTypeByRef: EmitConv(m_pStackPointer - 1, NULL, StackTypeI, INTOP_MOV_8); break; case StackTypeI8: @@ -1884,7 +1874,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) case StackTypeI8: EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8); break; - case StackTypeMP: + case StackTypeByRef: EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_P); break; default: @@ -1907,7 +1897,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) case StackTypeI8: EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8); break; - case StackTypeMP: + case StackTypeByRef: EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_P); break; default: @@ -1931,7 +1921,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) } case StackTypeI8: break; - case StackTypeMP: + case StackTypeByRef: #ifdef TARGET_64BIT EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_MOV_8); #else @@ -1998,7 +1988,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) case StackTypeR8: EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_CONV_U8_R8); break; - case StackTypeMP: + case StackTypeByRef: #ifdef TARGET_64BIT EmitConv(m_pStackPointer - 1, NULL, StackTypeI8, INTOP_MOV_8); #else @@ -2313,8 +2303,7 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) linkBBlocks = false; needsRetryEmit = false; - if (m_verbose) - printf("retry emit\n"); + INTERP_DUMP("retry emit\n"); goto retry_emit; } @@ -2400,7 +2389,7 @@ void InterpCompiler::PrintIns(InterpInst *ins) if (ins->info.pCallInfo && ins->info.pCallInfo->pCallArgs) { int *callArgs = ins->info.pCallInfo->pCallArgs; - while (*callArgs != -1) + while (*callArgs != CALL_ARGS_TERMINATOR) { printf(" %d", *callArgs); callArgs++; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 86447e0e1b637d..1f93a9a308f255 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -16,7 +16,7 @@ enum StackType { StackTypeR8, StackTypeO, StackTypeVT, - StackTypeMP, + StackTypeByRef, StackTypeF, #ifdef TARGET_64BIT StackTypeI = StackTypeI8 @@ -47,6 +47,16 @@ enum InterpType { #endif }; +#ifdef DEBUG +#define INTERP_DUMP(...) \ + { \ + if (m_verbose) \ + printf(__VA_ARGS__); \ + } +#else +#define INTERP_DUMP(...) +#endif + struct InterpInst; struct InterpBasicBlock; @@ -115,6 +125,7 @@ struct InterpInst }; #define CALL_ARGS_SVAR -2 +#define CALL_ARGS_TERMINATOR -1 struct StackInfo; @@ -140,6 +151,24 @@ struct InterpBasicBlock InterpBasicBlock **ppOutBBs; InterpBBState emitState; + + InterpBasicBlock(int32_t index) : InterpBasicBlock(index, 0) { } + + InterpBasicBlock(int32_t index, int32_t ilOffset) + { + this->index = index; + this->ilOffset = ilOffset; + nativeOffset = -1; + stackHeight = -1; + + pFirstIns = pLastIns = NULL; + pNextBB = NULL; + + inCount = 0; + outCount = 0; + + emitState = BBStateNotEmitted; + } }; struct InterpVar @@ -163,6 +192,23 @@ struct InterpVar unsigned int global : 1; // Dedicated stack offset throughout method execution unsigned int ILGlobal : 1; // Args and IL locals unsigned int alive : 1; // Used internally by the var offset allocator + + InterpVar(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size) + { + this->interpType = interpType; + this->clsHnd = clsHnd; + this->size = size; + offset = -1; + liveStart = -1; + bbIndex = -1; + indirects = 0; + + callArgs = false; + noCallArgs = false; + global = false; + ILGlobal = false; + alive = false; + } }; struct StackInfo @@ -177,7 +223,7 @@ struct StackInfo // the stack a new var is created. int var; - void Init(StackType type) + StackInfo(StackType type) { this->type = type; clsHnd = NULL; @@ -200,6 +246,14 @@ struct Reloc // Base offset that the relative offset to be embedded in IR applies to int32_t offset; InterpBasicBlock *pTargetBB; + + Reloc(RelocType type, int32_t offset, InterpBasicBlock *pTargetBB, int skip) + { + this->type = type; + this->offset = offset; + this->pTargetBB = pTargetBB; + this->skip = skip; + } }; typedef class ICorJitInfo* COMP_HANDLE; @@ -220,6 +274,10 @@ class InterpCompiler int32_t m_ILCodeSize; int32_t m_currentILOffset; + // This represents a mapping from indexes to pointer sized data. During compilation, an + // instruction can request an index for some data (like a MethodDesc pointer), that it + // will then embed in the instruction stream. The data item table will be referenced + // from the interpreter code header during execution. // FIXME during compilation this should be a hashtable for fast lookup of duplicates TArray m_dataItems; int32_t GetDataItemIndex(void* data); @@ -228,6 +286,8 @@ class InterpCompiler int GenerateCode(CORINFO_METHOD_INFO* methodInfo); void* AllocMethodData(size_t numBytes); + // FIXME Mempool allocation currently leaks. We need to add an allocator and then + // free all memory when method is finished compilling. void* AllocMemPool(size_t numBytes); void* AllocMemPool0(size_t numBytes); void* AllocTemporary(size_t numBytes); @@ -258,7 +318,7 @@ class InterpCompiler int m_BBCount = 0; InterpBasicBlock** m_ppOffsetToBB; - InterpBasicBlock* AllocBB(); + InterpBasicBlock* AllocBB(int32_t ilOffset); InterpBasicBlock* GetBB(int32_t ilOffset); void LinkBBs(InterpBasicBlock *from, InterpBasicBlock *to); void UnlinkBBs(InterpBasicBlock *from, InterpBasicBlock *to); @@ -284,7 +344,7 @@ class InterpCompiler void AllocVarOffsetCB(int *pVar, void *pData); int32_t AllocVarOffset(int var, int32_t *pPos); - int32_t GetInterpTypeSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign); + int32_t GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, InterpType interpType, int32_t *pAlign); void CreateILVars(); // Stack @@ -326,7 +386,7 @@ class InterpCompiler // Passes int32_t* m_pMethodCode; - int32_t m_methodCodeSize; // in int32_t + int32_t m_methodCodeSize; // code size measured in int32_t slots, instead of bytes void AllocOffsets(); int32_t ComputeCodeSize(); diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index cdfbd04cabb577..cc188d53101a57 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -18,6 +18,8 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) return m_pVars[var].offset; } +// Global vars are variables that are referenced from multiple basic blocks. We reserve +// a dedicated slot for each such variable. int32_t InterpCompiler::AllocGlobalVarOffset(int var) { return AllocVarOffset(var, &m_totalVarsStackSize); @@ -54,8 +56,7 @@ void InterpCompiler::InitializeGlobalVar(int32_t var, int bbIndex) { AllocGlobalVarOffset(var); m_pVars[var].global = true; - if (m_verbose) - printf("alloc global var %d to offset %d\n", var, m_pVars[var].offset); + INTERP_DUMP("alloc global var %d to offset %d\n", var, m_pVars[var].offset); } } @@ -84,8 +85,7 @@ void InterpCompiler::InitializeGlobalVars() { AllocGlobalVarOffset(var); m_pVars[var].global = true; - if (m_verbose) - printf("alloc global var %d to offset %d\n", var, m_pVars[var].offset); + INTERP_DUMP("alloc global var %d to offset %d\n", var, m_pVars[var].offset); } } ForEachInsVar(pIns, (void*)(size_t)pBB->index, &InterpCompiler::InitializeGlobalVarCB); @@ -94,7 +94,11 @@ void InterpCompiler::InitializeGlobalVars() m_totalVarsStackSize = ALIGN_UP_TO(m_totalVarsStackSize, INTERP_STACK_ALIGNMENT); } -// For each call instruction, this method computes its base offset. The base offset is computed as +// In the final codegen, each call instruction will receive a single offset as an argument. At this +// offset all the call arguments will be located. This offset will point into the param area. Vars +// allocated here have special constraints compared to normal local/global vars. +// +// For each call instruction, this method computes its args offset. The call offset is computed as // the max offset of all call offsets on which the call depends. Stack ensures that all call offsets // on which the call depends are calculated before the call in question, by deferring calls from the // last to the first one. @@ -167,7 +171,7 @@ void InterpCompiler::EndActiveCall(InterpInst *call) if (callArgs && (*callArgs != -1)) { int32_t var = *callArgs; - while (var != -1) + while (var != CALL_ARGS_TERMINATOR) { AllocVarOffset(var, &baseOffset); callArgs++; @@ -219,8 +223,7 @@ void InterpCompiler::AllocOffsets() InitializeGlobalVars(); - if (m_verbose) - printf("\nAllocating var offsets\n"); + INTERP_DUMP("\nAllocating var offsets\n"); int finalVarsStackSize = m_totalVarsStackSize; @@ -230,8 +233,7 @@ void InterpCompiler::AllocOffsets() InterpInst *pIns; int insIndex = 0; - if (m_verbose) - printf("BB%d\n", pBB->index); + INTERP_DUMP("BB%d\n", pBB->index); // All data structs should be left empty after a bblock iteration assert(m_pActiveVars->GetSize() == 0); @@ -300,11 +302,13 @@ void InterpCompiler::AllocOffsets() if (opcode == INTOP_NOP) continue; +#ifdef DEBUG if (m_verbose) { printf("\tins_index %d\t", insIndex); PrintIns(pIns); } +#endif // Expire source vars. We first mark them as not alive and then compact the array for (int i = 0; i < g_interpOpSVars[opcode]; i++) @@ -344,8 +348,7 @@ void InterpCompiler::AllocOffsets() else if (!m_pVars[var].global && m_pVars[var].offset == -1) { AllocVarOffset(var, ¤tOffset); - if (m_verbose) - printf("alloc var %d to offset %d\n", var, m_pVars[var].offset); + INTERP_DUMP("alloc var %d to offset %d\n", var, m_pVars[var].offset); if (currentOffset > finalVarsStackSize) finalVarsStackSize = currentOffset; @@ -364,6 +367,7 @@ void InterpCompiler::AllocOffsets() } } +#ifdef DEBUG if (m_verbose) { printf("active vars:"); @@ -375,6 +379,7 @@ void InterpCompiler::AllocOffsets() } printf("\n"); } +#endif insIndex++; } } diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp index 0f9c02b448f1a0..ec534ffa74b44b 100644 --- a/src/coreclr/interpreter/eeinterp.cpp +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -78,7 +78,7 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, return CORJIT_SKIPPED; } - InterpCompiler compiler(compHnd, methodInfo, false); + InterpCompiler compiler(compHnd, methodInfo, false /* verbose */); InterpMethod *pMethod = compiler.CompileMethod(); int32_t IRCodeSize; diff --git a/src/coreclr/interpreter/intops.h b/src/coreclr/interpreter/intops.h index 7d0104df22bbb8..fa64b28a971974 100644 --- a/src/coreclr/interpreter/intops.h +++ b/src/coreclr/interpreter/intops.h @@ -54,35 +54,30 @@ static inline bool InterpOpIsCondBranch(int32_t opcode) return opcode >= INTOP_BRFALSE_I4 && opcode <= INTOP_BLT_UN_R8; } -// Helpers identical to ones used by JIT -// FIXME how to consume GET_UNALIGNED_VAL defines from pal as jit ??? -// -//#include "pal_mstypes.h" -//#include "pal_endian.h" - +// Helpers for reading data from uint8_t code stream inline uint16_t getU2LittleEndian(const uint8_t* ptr) { - return *(uint16_t*)ptr; + return *ptr | *(ptr + 1) << 8; } inline uint32_t getU4LittleEndian(const uint8_t* ptr) { - return *(uint32_t*)ptr; + return *ptr | *(ptr + 1) << 8 | *(ptr + 2) << 16 | *(ptr + 3) << 24; } inline int16_t getI2LittleEndian(const uint8_t* ptr) { - return *(int16_t*)ptr; + return (int16_t)getU2LittleEndian(ptr); } inline int32_t getI4LittleEndian(const uint8_t* ptr) { - return *(int32_t*)ptr; + return (int32_t)getU4LittleEndian(ptr); } inline int64_t getI8LittleEndian(const uint8_t* ptr) { - return *(int64_t*)ptr; + return (int64_t)getI4LittleEndian(ptr) | ((int64_t)getI4LittleEndian(ptr + 4)) << 32; } inline float getR4LittleEndian(const uint8_t* ptr) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 75b4dbb66a3132..3f2cf7a65dfbdb 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -717,13 +717,12 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh // Allocate child frame. { - InterpMethodContextFrame *pChildFrame = pFrame->pNextFree; + InterpMethodContextFrame *pChildFrame = pFrame->pNext; if (!pChildFrame) { pChildFrame = (InterpMethodContextFrame*)alloca(sizeof(InterpMethodContextFrame)); - pChildFrame->pNextFree = NULL; - // Not free currently, but will be when allocation attempted. - pFrame->pNextFree = pChildFrame; + pChildFrame->pNext = NULL; + pFrame->pNext = pChildFrame; } pChildFrame->ReInit(pFrame, targetIp, stack + returnOffset, stack + callArgsOffset); pFrame = pChildFrame; diff --git a/src/coreclr/vm/interpexec.h b/src/coreclr/vm/interpexec.h index f7fe2bddb74c40..19d1e12500e082 100644 --- a/src/coreclr/vm/interpexec.h +++ b/src/coreclr/vm/interpexec.h @@ -28,7 +28,7 @@ struct InterpMethodContextFrame int8_t *pStack; int8_t *pRetVal; const int32_t *ip; // This ip is updated only when execution can leave the frame - InterpMethodContextFrame *pNextFree; + InterpMethodContextFrame *pNext; void ReInit(InterpMethodContextFrame *pParent, const int32_t *startIp, int8_t *pRetVal, int8_t *pStack) { From 446b24bc7107389ba4ba08db14c056855a88c05e Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Mon, 17 Mar 2025 23:04:10 +0200 Subject: [PATCH 16/16] Add gc state transition --- src/coreclr/vm/interpexec.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 3f2cf7a65dfbdb..58ece1e6d234c2 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -698,6 +698,7 @@ void InterpExecMethod(InterpMethodContextFrame *pFrame, InterpThreadContext *pTh MethodDesc *pMD = (MethodDesc*)(targetMethod & ~INTERP_METHOD_DESC_TAG); PCODE code = pMD->GetNativeCode(); if (!code) { + GCX_PREEMP(); pMD->PrepareInitialCode(CallerGCMode::Coop); code = pMD->GetNativeCode(); }