Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-43846: Use less stack for large literals and calls #25403

Merged
merged 6 commits into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,32 @@ def test_if_else(self):
def test_binop(self):
self.check_stack_size("x + " * self.N + "x")

def test_list(self):
self.check_stack_size("[" + "x, " * self.N + "x]")

def test_tuple(self):
self.check_stack_size("(" + "x, " * self.N + "x)")

def test_set(self):
self.check_stack_size("{" + "x, " * self.N + "x}")

def test_dict(self):
self.check_stack_size("{" + "x:x, " * self.N + "x:x}")

def test_func_args(self):
self.check_stack_size("f(" + "x, " * self.N + ")")

def test_func_kwargs(self):
kwargs = (f'a{i}=x' for i in range(self.N))
self.check_stack_size("f(" + ", ".join(kwargs) + ")")

def test_func_args(self):
self.check_stack_size("o.m(" + "x, " * self.N + ")")

def test_meth_kwargs(self):
kwargs = (f'a{i}=x' for i in range(self.N))
self.check_stack_size("o.m(" + ", ".join(kwargs) + ")")

def test_func_and(self):
code = "def f(x):\n"
code += " x and x\n" * self.N
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Data stack usage is much reduced for large literal and call expressions.
194 changes: 144 additions & 50 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,30 @@
#define COMP_SETCOMP 2
#define COMP_DICTCOMP 3

/* A soft limit for stack use, to avoid excessive
* memory use for large constants, etc.
*
* The value 30 is plucked out of thin air.
* Code that could use more stack than this is
* rare, so the exact value is unimportant.
*/
#define STACK_USE_GUIDELINE 30

/* If we exceed this limit, it should
* be considered a compiler bug.
* Currently it should be impossible
* to exceed STACK_USE_GUIDELINE * 100,
* as 100 is the maximum parse depth.
* For performance reasons we will
* want to reduce this to a
* few hundred in the future.
*
* NOTE: Whatever MAX_ALLOWED_STACK_USE is
* set to, it should never restrict what Python
* we can write, just how we compile it.
*/
#define MAX_ALLOWED_STACK_USE (STACK_USE_GUIDELINE * 100)

#define IS_TOP_LEVEL_AWAIT(c) ( \
(c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
&& (c->u->u_ste->ste_type == ModuleBlock))
Expand Down Expand Up @@ -1403,7 +1427,7 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict,
*/

static int
compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
compiler_addop_i_line(struct compiler *c, int opcode, Py_ssize_t oparg, int lineno)
{
struct instr *i;
int off;
Expand All @@ -1424,10 +1448,22 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
i = &c->u->u_curblock->b_instr[off];
i->i_opcode = opcode;
i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int);
i->i_lineno = c->u->u_lineno;
i->i_lineno = lineno;
return 1;
}

static int
compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
{
return compiler_addop_i_line(c, opcode, oparg, c->u->u_lineno);
}

static int
compiler_addop_i_noline(struct compiler *c, int opcode, Py_ssize_t oparg)
{
return compiler_addop_i_line(c, opcode, oparg, -1);
}

static int add_jump_to_block(basicblock *b, int opcode, int lineno, basicblock *target)
{
assert(HAS_ARG(opcode));
Expand Down Expand Up @@ -1527,6 +1563,11 @@ compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b)
return 0; \
}

#define ADDOP_I_NOLINE(C, OP, O) { \
if (!compiler_addop_i_noline((C), (OP), (O))) \
return 0; \
}

#define ADDOP_JUMP(C, OP, O) { \
if (!compiler_addop_j((C), (OP), (O))) \
return 0; \
Expand Down Expand Up @@ -3707,14 +3748,13 @@ starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed,
int build, int add, int extend, int tuple)
{
Py_ssize_t n = asdl_seq_LEN(elts);
Py_ssize_t i, seen_star = 0;
if (n > 2 && are_all_items_const(elts, 0, n)) {
PyObject *folded = PyTuple_New(n);
if (folded == NULL) {
return 0;
}
PyObject *val;
for (i = 0; i < n; i++) {
for (Py_ssize_t i = 0; i < n; i++) {
val = ((expr_ty)asdl_seq_GET(elts, i))->v.Constant.value;
Py_INCREF(val);
PyTuple_SET_ITEM(folded, i, val);
Expand All @@ -3735,38 +3775,16 @@ starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed,
return 1;
}

for (i = 0; i < n; i++) {
int big = n+pushed > STACK_USE_GUIDELINE;
int seen_star = 0;
for (Py_ssize_t i = 0; i < n; i++) {
expr_ty elt = asdl_seq_GET(elts, i);
if (elt->kind == Starred_kind) {
seen_star = 1;
}
}
if (seen_star) {
seen_star = 0;
for (i = 0; i < n; i++) {
expr_ty elt = asdl_seq_GET(elts, i);
if (elt->kind == Starred_kind) {
if (seen_star == 0) {
ADDOP_I(c, build, i+pushed);
seen_star = 1;
}
VISIT(c, expr, elt->v.Starred.value);
ADDOP_I(c, extend, 1);
}
else {
VISIT(c, expr, elt);
if (seen_star) {
ADDOP_I(c, add, 1);
}
}
}
assert(seen_star);
if (tuple) {
ADDOP(c, LIST_TO_TUPLE);
}
}
else {
for (i = 0; i < n; i++) {
if (!seen_star && !big) {
for (Py_ssize_t i = 0; i < n; i++) {
expr_ty elt = asdl_seq_GET(elts, i);
VISIT(c, expr, elt);
}
Expand All @@ -3775,6 +3793,33 @@ starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed,
} else {
ADDOP_I(c, build, n+pushed);
}
return 1;
}
int sequence_built = 0;
if (big) {
ADDOP_I(c, build, pushed);
sequence_built = 1;
}
for (Py_ssize_t i = 0; i < n; i++) {
expr_ty elt = asdl_seq_GET(elts, i);
if (elt->kind == Starred_kind) {
if (sequence_built == 0) {
ADDOP_I(c, build, i+pushed);
sequence_built = 1;
}
VISIT(c, expr, elt->v.Starred.value);
ADDOP_I(c, extend, 1);
}
else {
VISIT(c, expr, elt);
if (sequence_built) {
ADDOP_I(c, add, 1);
}
}
}
assert(sequence_built);
if (tuple) {
ADDOP(c, LIST_TO_TUPLE);
}
return 1;
}
Expand Down Expand Up @@ -3874,7 +3919,8 @@ compiler_subdict(struct compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end
{
Py_ssize_t i, n = end - begin;
PyObject *keys, *key;
if (n > 1 && are_all_items_const(e->v.Dict.keys, begin, end)) {
int big = n*2 > STACK_USE_GUIDELINE;
if (n > 1 && !big && are_all_items_const(e->v.Dict.keys, begin, end)) {
for (i = begin; i < end; i++) {
VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
}
Expand All @@ -3889,12 +3935,19 @@ compiler_subdict(struct compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end
}
ADDOP_LOAD_CONST_NEW(c, keys);
ADDOP_I(c, BUILD_CONST_KEY_MAP, n);
return 1;
}
else {
for (i = begin; i < end; i++) {
VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.keys, i));
VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
if (big) {
ADDOP_I(c, BUILD_MAP, 0);
}
for (i = begin; i < end; i++) {
VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.keys, i));
VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
if (big) {
ADDOP_I(c, MAP_ADD, 1);
}
}
if (!big) {
ADDOP_I(c, BUILD_MAP, n);
}
return 1;
Expand Down Expand Up @@ -3930,7 +3983,7 @@ compiler_dict(struct compiler *c, expr_ty e)
ADDOP_I(c, DICT_UPDATE, 1);
}
else {
if (elements == 0xFFFF) {
if (elements*2 > STACK_USE_GUIDELINE) {
if (!compiler_subdict(c, e, i - elements, i + 1)) {
return 0;
}
Expand Down Expand Up @@ -4126,11 +4179,15 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e)
/* Check that the call node is an attribute access, and that
the call doesn't have keyword parameters. */
if (meth->kind != Attribute_kind || meth->v.Attribute.ctx != Load ||
asdl_seq_LEN(e->v.Call.keywords))
asdl_seq_LEN(e->v.Call.keywords)) {
return -1;

/* Check that there are no *varargs types of arguments. */
}
/* Check that there aren't too many arguments */
argsl = asdl_seq_LEN(args);
if (argsl >= STACK_USE_GUIDELINE) {
return -1;
}
/* Check that there are no *varargs types of arguments. */
for (i = 0; i < argsl; i++) {
expr_ty elt = asdl_seq_GET(args, i);
if (elt->kind == Starred_kind) {
Expand Down Expand Up @@ -4192,9 +4249,29 @@ compiler_call(struct compiler *c, expr_ty e)
static int
compiler_joined_str(struct compiler *c, expr_ty e)
{
VISIT_SEQ(c, expr, e->v.JoinedStr.values);
if (asdl_seq_LEN(e->v.JoinedStr.values) != 1)
ADDOP_I(c, BUILD_STRING, asdl_seq_LEN(e->v.JoinedStr.values));

Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values);
if (value_count > STACK_USE_GUIDELINE) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use >= for consistency with earlier locations?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've used > throughout. It seems to fit better with STACK_USE_GUIDELINE being the maximum stack use.

ADDOP_LOAD_CONST_NEW(c, _PyUnicode_FromASCII("", 0));
PyObject *join = _PyUnicode_FromASCII("join", 4);
if (join == NULL) {
return 0;
}
ADDOP_NAME(c, LOAD_METHOD, join, names);
Py_DECREF(join);
ADDOP_I(c, BUILD_LIST, 0);
for (Py_ssize_t i = 0; i < asdl_seq_LEN(e->v.JoinedStr.values); i++) {
VISIT(c, expr, asdl_seq_GET(e->v.JoinedStr.values, i));
ADDOP_I(c, LIST_APPEND, 1);
}
ADDOP_I(c, CALL_METHOD, 1);
}
else {
VISIT_SEQ(c, expr, e->v.JoinedStr.values);
if (asdl_seq_LEN(e->v.JoinedStr.values) != 1) {
ADDOP_I(c, BUILD_STRING, asdl_seq_LEN(e->v.JoinedStr.values));
}
}
return 1;
}

Expand Down Expand Up @@ -4251,7 +4328,8 @@ compiler_subkwargs(struct compiler *c, asdl_keyword_seq *keywords, Py_ssize_t be
keyword_ty kw;
PyObject *keys, *key;
assert(n > 0);
if (n > 1) {
int big = n*2 > STACK_USE_GUIDELINE;
if (n > 1 && !big) {
for (i = begin; i < end; i++) {
kw = asdl_seq_GET(keywords, i);
VISIT(c, expr, kw->value);
Expand All @@ -4267,14 +4345,20 @@ compiler_subkwargs(struct compiler *c, asdl_keyword_seq *keywords, Py_ssize_t be
}
ADDOP_LOAD_CONST_NEW(c, keys);
ADDOP_I(c, BUILD_CONST_KEY_MAP, n);
return 1;
}
else {
/* a for loop only executes once */
for (i = begin; i < end; i++) {
kw = asdl_seq_GET(keywords, i);
ADDOP_LOAD_CONST(c, kw->arg);
VISIT(c, expr, kw->value);
if (big) {
ADDOP_I_NOLINE(c, BUILD_MAP, 0);
}
for (i = begin; i < end; i++) {
kw = asdl_seq_GET(keywords, i);
ADDOP_LOAD_CONST(c, kw->arg);
VISIT(c, expr, kw->value);
if (big) {
ADDOP_I_NOLINE(c, MAP_ADD, 1);
}
}
if (!big) {
ADDOP_I(c, BUILD_MAP, n);
}
return 1;
Expand All @@ -4296,6 +4380,9 @@ compiler_call_helper(struct compiler *c,
nelts = asdl_seq_LEN(args);
nkwelts = asdl_seq_LEN(keywords);

if (nelts + nkwelts*2 > STACK_USE_GUIDELINE) {
goto ex_call;
}
for (i = 0; i < nelts; i++) {
expr_ty elt = asdl_seq_GET(args, i);
if (elt->kind == Starred_kind) {
Expand Down Expand Up @@ -6603,6 +6690,13 @@ makecode(struct compiler *c, struct assembler *a, PyObject *consts)
Py_DECREF(consts);
goto error;
}
if (maxdepth > MAX_ALLOWED_STACK_USE) {
PyErr_Format(PyExc_SystemError,
"excessive stack use: stack is %d deep",
maxdepth);
Py_DECREF(consts);
goto error;
}
co = PyCode_NewWithPosOnlyArgs(posonlyargcount+posorkeywordargcount,
posonlyargcount, kwonlyargcount, nlocals_int,
maxdepth, flags, a->a_bytecode, consts, names,
Expand Down