Skip to content

Commit

Permalink
gh-112287: Speed up Tier 2 (uop) interpreter a little (#112286)
Browse files Browse the repository at this point in the history
This makes the Tier 2 interpreter a little faster.
I calculated by about 3%,
though I hesitate to claim an exact number.

This starts by doubling the trace size limit (to 512),
making it more likely that loops fit in a trace.

The rest of the approach is to only load
`oparg` and `operand` in cases that use them.
The code generator know when these are used.

For `oparg`, it will conditionally emit
```
oparg = CURRENT_OPARG();
```
at the top of the case block.
(The `oparg` variable may be referenced multiple times
by the instructions code block, so it must be in a variable.)

For `operand`, it will use `CURRENT_OPERAND()` directly
instead of referencing the `operand` variable,
which no longer exists.
(There is only one place where this will be used.)
  • Loading branch information
gvanrossum authored Nov 20, 2023
1 parent c4c6321 commit 8deb8bc
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 33 deletions.
2 changes: 1 addition & 1 deletion Include/internal/pycore_uops.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ extern "C" {

#include "pycore_frame.h" // _PyInterpreterFrame

#define _Py_UOP_MAX_TRACE_LENGTH 256
#define _Py_UOP_MAX_TRACE_LENGTH 512

typedef struct {
uint16_t opcode;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Slightly optimize the Tier 2 (uop) interpreter by only loading ``oparg`` and
``operand`` when needed. Also double the trace size limit again, to 512 this
time.
16 changes: 7 additions & 9 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -994,21 +994,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

OPT_STAT_INC(traces_executed);
_PyUOpInstruction *next_uop = current_executor->trace;
uint64_t operand;
#ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0;
#endif

for (;;) {
opcode = next_uop->opcode;
oparg = next_uop->oparg;
operand = next_uop->operand;
DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n",
(int)(next_uop - current_executor->trace),
_PyUopName(opcode),
oparg,
operand,
next_uop->oparg,
next_uop->operand,
next_uop->target,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
next_uop++;
Expand All @@ -1025,8 +1022,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
default:
#ifdef Py_DEBUG
{
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 "\n",
opcode, oparg, operand);
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n",
opcode, next_uop[-1].oparg, next_uop[-1].operand,
(int)(next_uop - current_executor->trace - 1));
Py_FatalError("Unknown uop");
}
#else
Expand Down Expand Up @@ -1055,7 +1053,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
STACK_SHRINK(1);
error_tier_two:
DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target,
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1));
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Don't leave this random
Expand All @@ -1068,7 +1066,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target,
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1));
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(opcode, miss);
Expand Down
4 changes: 4 additions & 0 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -397,3 +397,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame);
#define GOTO_TIER_TWO() goto enter_tier_two;

#define GOTO_TIER_ONE() goto exit_trace;

#define CURRENT_OPARG() (next_uop[-1].oparg)

#define CURRENT_OPERAND() (next_uop[-1].operand)
Loading

0 comments on commit 8deb8bc

Please sign in to comment.