@@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {
307307
308308static const char *
309309uop_name (int index ) {
310- if (index < EXIT_TRACE ) {
310+ if (index < 256 ) {
311311 return _PyOpcode_OpName [index ];
312312 }
313313 return _PyOpcode_uop_name [index ];
@@ -316,9 +316,9 @@ uop_name(int index) {
316316static Py_ssize_t
317317uop_len (_PyUOpExecutorObject * self )
318318{
319- int count = 1 ;
319+ int count = 0 ;
320320 for (; count < _Py_UOP_MAX_TRACE_LENGTH ; count ++ ) {
321- if (self -> trace [count - 1 ].opcode == EXIT_TRACE ) {
321+ if (self -> trace [count ].opcode == 0 ) {
322322 break ;
323323 }
324324 }
@@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
328328static PyObject *
329329uop_item (_PyUOpExecutorObject * self , Py_ssize_t index )
330330{
331- for (int i = 0 ; i < _Py_UOP_MAX_TRACE_LENGTH ; i ++ ) {
332- if (self -> trace [i ].opcode == EXIT_TRACE ) {
333- break ;
334- }
335- if (i != index ) {
336- continue ;
337- }
338- const char * name = uop_name (self -> trace [i ].opcode );
339- PyObject * oname = _PyUnicode_FromASCII (name , strlen (name ));
340- if (oname == NULL ) {
341- return NULL ;
342- }
343- PyObject * operand = PyLong_FromUnsignedLongLong (self -> trace [i ].operand );
344- if (operand == NULL ) {
345- Py_DECREF (oname );
346- return NULL ;
347- }
348- PyObject * args [2 ] = { oname , operand };
349- return _PyTuple_FromArraySteal (args , 2 );
331+ Py_ssize_t len = uop_len (self );
332+ if (index < 0 || index >= len ) {
333+ PyErr_SetNone (PyExc_IndexError );
334+ return NULL ;
350335 }
351- PyErr_SetNone (PyExc_IndexError );
352- return NULL ;
336+ const char * name = uop_name (self -> trace [index ].opcode );
337+ if (name == NULL ) {
338+ name = "<nil>" ;
339+ }
340+ PyObject * oname = _PyUnicode_FromASCII (name , strlen (name ));
341+ if (oname == NULL ) {
342+ return NULL ;
343+ }
344+ PyObject * operand = PyLong_FromUnsignedLongLong (self -> trace [index ].operand );
345+ if (operand == NULL ) {
346+ Py_DECREF (oname );
347+ return NULL ;
348+ }
349+ PyObject * args [2 ] = { oname , operand };
350+ return _PyTuple_FromArraySteal (args , 2 );
353351}
354352
355353PySequenceMethods uop_as_sequence = {
@@ -372,12 +370,13 @@ translate_bytecode_to_trace(
372370 PyCodeObject * code ,
373371 _Py_CODEUNIT * instr ,
374372 _PyUOpInstruction * trace ,
375- int max_length )
373+ int buffer_size )
376374{
377375#ifdef Py_DEBUG
378376 _Py_CODEUNIT * initial_instr = instr ;
379377#endif
380378 int trace_length = 0 ;
379+ int max_length = buffer_size ;
381380
382381#ifdef Py_DEBUG
383382 char * uop_debug = Py_GETENV ("PYTHONUOPSDEBUG" );
@@ -401,6 +400,14 @@ translate_bytecode_to_trace(
401400 trace[trace_length].operand = (OPERAND); \
402401 trace_length++;
403402
403+ #define ADD_TO_STUB (INDEX , OPCODE , OPERAND ) \
404+ DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
405+ (INDEX), \
406+ (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
407+ (uint64_t)(OPERAND)); \
408+ trace[(INDEX)].opcode = (OPCODE); \
409+ trace[(INDEX)].operand = (OPERAND);
410+
404411 DPRINTF (4 ,
405412 "Optimizing %s (%s:%d) at byte offset %ld\n" ,
406413 PyUnicode_AsUTF8 (code -> co_qualname ),
@@ -409,7 +416,7 @@ translate_bytecode_to_trace(
409416 2 * (long )(initial_instr - (_Py_CODEUNIT * )code -> co_code_adaptive ));
410417
411418 for (;;) {
412- ADD_TO_TRACE (SAVE_IP , ( int )( instr - (_Py_CODEUNIT * )code -> co_code_adaptive ) );
419+ ADD_TO_TRACE (SAVE_IP , instr - (_Py_CODEUNIT * )code -> co_code_adaptive );
413420 int opcode = instr -> op .code ;
414421 int oparg = instr -> op .arg ;
415422 int extras = 0 ;
@@ -420,12 +427,35 @@ translate_bytecode_to_trace(
420427 oparg = (oparg << 8 ) | instr -> op .arg ;
421428 }
422429 if (opcode == ENTER_EXECUTOR ) {
423- _PyExecutorObject * executor = (_PyExecutorObject * )code -> co_executors -> executors [oparg & 255 ];
430+ _PyExecutorObject * executor =
431+ (_PyExecutorObject * )code -> co_executors -> executors [oparg & 255 ];
424432 opcode = executor -> vm_data .opcode ;
425433 DPRINTF (2 , " * ENTER_EXECUTOR -> %s\n" , _PyOpcode_OpName [opcode ]);
426434 oparg = (oparg & 0xffffff00 ) | executor -> vm_data .oparg ;
427435 }
428436 switch (opcode ) {
437+
438+ case POP_JUMP_IF_FALSE :
439+ case POP_JUMP_IF_TRUE :
440+ {
441+ // Assume jump unlikely (TODO: handle jump likely case)
442+ // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
443+ if (trace_length + 5 > max_length ) {
444+ DPRINTF (1 , "Ran out of space for POP_JUMP_IF_FALSE\n" );
445+ goto done ;
446+ }
447+ _Py_CODEUNIT * target_instr =
448+ instr + 1 + _PyOpcode_Caches [_PyOpcode_Deopt [opcode ]] + oparg ;
449+ max_length -= 2 ; // Really the start of the stubs
450+ int uopcode = opcode == POP_JUMP_IF_TRUE ?
451+ _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE ;
452+ ADD_TO_TRACE (uopcode , max_length );
453+ ADD_TO_STUB (max_length , SAVE_IP ,
454+ target_instr - (_Py_CODEUNIT * )code -> co_code_adaptive );
455+ ADD_TO_STUB (max_length + 1 , EXIT_TRACE , 0 );
456+ break ;
457+ }
458+
429459 default :
430460 {
431461 const struct opcode_macro_expansion * expansion = & _PyOpcode_macro_expansion [opcode ];
@@ -503,6 +533,30 @@ translate_bytecode_to_trace(
503533 code -> co_firstlineno ,
504534 2 * (long )(initial_instr - (_Py_CODEUNIT * )code -> co_code_adaptive ),
505535 trace_length );
536+ if (max_length < buffer_size && trace_length < max_length ) {
537+ // Move the stubs back to be immediately after the main trace
538+ // (which ends at trace_length)
539+ DPRINTF (2 ,
540+ "Moving %d stub uops back by %d\n" ,
541+ buffer_size - max_length ,
542+ max_length - trace_length );
543+ memmove (trace + trace_length ,
544+ trace + max_length ,
545+ (buffer_size - max_length ) * sizeof (_PyUOpInstruction ));
546+ // Patch up the jump targets
547+ for (int i = 0 ; i < trace_length ; i ++ ) {
548+ if (trace [i ].opcode == _POP_JUMP_IF_FALSE ||
549+ trace [i ].opcode == _POP_JUMP_IF_TRUE )
550+ {
551+ int target = trace [i ].operand ;
552+ if (target >= max_length ) {
553+ target += trace_length - max_length ;
554+ trace [i ].operand = target ;
555+ }
556+ }
557+ }
558+ trace_length += buffer_size - max_length ;
559+ }
506560 return trace_length ;
507561 }
508562 else {
@@ -539,6 +593,9 @@ uop_optimize(
539593 }
540594 executor -> base .execute = _PyUopExecute ;
541595 memcpy (executor -> trace , trace , trace_length * sizeof (_PyUOpInstruction ));
596+ if (trace_length < _Py_UOP_MAX_TRACE_LENGTH ) {
597+ executor -> trace [trace_length ].opcode = 0 ; // Sentinel
598+ }
542599 * exec_ptr = (_PyExecutorObject * )executor ;
543600 return 1 ;
544601}
0 commit comments