Skip to content

Commit c491162

Browse files
committed
pythongh-108113: optimize ASTs in ast.parse/ast.literal_eval/compile(..., flags=ast.PyCF_ONLY_AST)
1 parent cc58ec9 commit c491162

File tree

7 files changed

+99
-38
lines changed

7 files changed

+99
-38
lines changed

Doc/library/ast.rst

+8-2
Original file line numberDiff line numberDiff line change
@@ -2122,10 +2122,10 @@ Async and await
21222122
Apart from the node classes, the :mod:`ast` module defines these utility functions
21232123
and classes for traversing abstract syntax trees:
21242124

2125-
.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False, feature_version=None)
2125+
.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False, feature_version=None, optimize=-1)
21262126

21272127
Parse the source into an AST node. Equivalent to ``compile(source,
2128-
filename, mode, ast.PyCF_ONLY_AST)``.
2128+
filename, mode, flags=ast.PyCF_ONLY_AST, optimize=optimize)``.
21292129

21302130
If ``type_comments=True`` is given, the parser is modified to check
21312131
and return type comments as specified by :pep:`484` and :pep:`526`.
@@ -2172,6 +2172,10 @@ and classes for traversing abstract syntax trees:
21722172
.. versionchanged:: 3.13
21732173
The minimum supported version for feature_version is now (3,7)
21742174

2175+
The output AST is now optimized with constant folding.
2176+
The ``optimize`` argument was added to control additional
2177+
optimizations.
2178+
21752179

21762180
.. function:: unparse(ast_obj)
21772181

@@ -2229,6 +2233,8 @@ and classes for traversing abstract syntax trees:
22292233
.. versionchanged:: 3.10
22302234
For string inputs, leading spaces and tabs are now stripped.
22312235

2236+
.. versionchanged:: 3.13
2237+
This function now understands and collapses const expressions.
22322238

22332239
.. function:: get_docstring(node, clean=True)
22342240

Doc/whatsnew/3.13.rst

+14
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ Other Language Changes
8585
This change will affect tools using docstrings, like :mod:`doctest`.
8686
(Contributed by Inada Naoki in :gh:`81283`.)
8787

88+
* The :func:`compile` built-in no longer ignores the ``optimize`` argument
89+
when called with the ``ast.PyCF_ONLY_AST`` flag.
90+
(Contributed by Irit Katriel in :gh:`108113`).
91+
8892
New Modules
8993
===========
9094

@@ -94,6 +98,16 @@ New Modules
9498
Improved Modules
9599
================
96100

101+
ast
102+
---
103+
104+
* :func:`ast.parse` and :func:`ast.literal_eval` now perform constant folding
105+
and other AST optimizations. This means that AST are more concise, and
106+
:func:`ast.literal_eval` understands and collapses const expressions.
107+
:func:`ast.parse` also accepts a new optional argument ``optimize``, which
108+
it forwards to the :func:`compile` built-in.
109+
(Contributed by Irit Katriel in :gh:`108113`).
110+
97111
array
98112
-----
99113

Lib/ast.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333

3434
def parse(source, filename='<unknown>', mode='exec', *,
35-
type_comments=False, feature_version=None):
35+
type_comments=False, feature_version=None, optimize=-1):
3636
"""
3737
Parse the source into an AST node.
3838
Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
@@ -50,7 +50,7 @@ def parse(source, filename='<unknown>', mode='exec', *,
5050
feature_version = minor
5151
# Else it should be an int giving the minor version for 3.x.
5252
return compile(source, filename, mode, flags,
53-
_feature_version=feature_version)
53+
_feature_version=feature_version, optimize=optimize)
5454

5555

5656
def literal_eval(node_or_string):
@@ -63,7 +63,7 @@ def literal_eval(node_or_string):
6363
Caution: A complex expression can overflow the C stack and cause a crash.
6464
"""
6565
if isinstance(node_or_string, str):
66-
node_or_string = parse(node_or_string.lstrip(" \t"), mode='eval')
66+
node_or_string = parse(node_or_string.lstrip(" \t"), mode='eval', optimize=0)
6767
if isinstance(node_or_string, Expression):
6868
node_or_string = node_or_string.body
6969
def _raise_malformed_node(node):

Lib/test/test_ast.py

+44-32
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
def to_tuple(t):
2222
if t is None or isinstance(t, (str, int, complex)) or t is Ellipsis:
2323
return t
24-
elif isinstance(t, list):
25-
return [to_tuple(e) for e in t]
24+
elif isinstance(t, (list, tuple)):
25+
return type(t)([to_tuple(e) for e in t])
2626
result = [t.__class__.__name__]
2727
if hasattr(t, 'lineno') and hasattr(t, 'col_offset'):
2828
result.append((t.lineno, t.col_offset))
@@ -274,7 +274,7 @@ def to_tuple(t):
274274
# Tuple
275275
"1,2,3",
276276
# Tuple
277-
"(1,2,3)",
277+
"(1,x,3)",
278278
# Empty tuple
279279
"()",
280280
# Combination
@@ -357,6 +357,15 @@ def test_ast_validation(self):
357357
tree = ast.parse(snippet)
358358
compile(tree, '<string>', 'exec')
359359

360+
def test_optimization_levels(self):
361+
cases = [(-1, __debug__), (0, True), (1, False), (2, False)]
362+
for (optval, expected) in cases:
363+
with self.subTest(optval=optval, expected=expected):
364+
res = ast.parse("__debug__", optimize=optval)
365+
self.assertIsInstance(res.body[0], ast.Expr)
366+
self.assertIsInstance(res.body[0].value, ast.Constant)
367+
self.assertEqual(res.body[0].value.value, expected)
368+
360369
def test_invalid_position_information(self):
361370
invalid_linenos = [
362371
(10, 1), (-10, -11), (10, -11), (-5, -2), (-5, 1)
@@ -948,7 +957,7 @@ def bad_normalize(*args):
948957
self.assertRaises(TypeError, ast.parse, '\u03D5')
949958

950959
def test_issue18374_binop_col_offset(self):
951-
tree = ast.parse('4+5+6+7')
960+
tree = ast.parse('a+b+c+d')
952961
parent_binop = tree.body[0].value
953962
child_binop = parent_binop.left
954963
grandchild_binop = child_binop.left
@@ -959,7 +968,7 @@ def test_issue18374_binop_col_offset(self):
959968
self.assertEqual(grandchild_binop.col_offset, 0)
960969
self.assertEqual(grandchild_binop.end_col_offset, 3)
961970

962-
tree = ast.parse('4+5-\\\n 6-7')
971+
tree = ast.parse('a+b-\\\n c-d')
963972
parent_binop = tree.body[0].value
964973
child_binop = parent_binop.left
965974
grandchild_binop = child_binop.left
@@ -1266,13 +1275,14 @@ def test_dump_incomplete(self):
12661275
)
12671276

12681277
def test_copy_location(self):
1269-
src = ast.parse('1 + 1', mode='eval')
1278+
src = ast.parse('x + 1', mode='eval')
12701279
src.body.right = ast.copy_location(ast.Constant(2), src.body.right)
12711280
self.assertEqual(ast.dump(src, include_attributes=True),
1272-
'Expression(body=BinOp(left=Constant(value=1, lineno=1, col_offset=0, '
1273-
'end_lineno=1, end_col_offset=1), op=Add(), right=Constant(value=2, '
1274-
'lineno=1, col_offset=4, end_lineno=1, end_col_offset=5), lineno=1, '
1275-
'col_offset=0, end_lineno=1, end_col_offset=5))'
1281+
"Expression(body=BinOp(left=Name(id='x', ctx=Load(), lineno=1, "
1282+
"col_offset=0, end_lineno=1, end_col_offset=1), op=Add(), "
1283+
"right=Constant(value=2, lineno=1, col_offset=4, end_lineno=1, "
1284+
"end_col_offset=5), lineno=1, col_offset=0, end_lineno=1, "
1285+
"end_col_offset=5))"
12761286
)
12771287
src = ast.Call(col_offset=1, lineno=1, end_lineno=1, end_col_offset=1)
12781288
new = ast.copy_location(src, ast.Call(col_offset=None, lineno=None))
@@ -1302,20 +1312,22 @@ def test_fix_missing_locations(self):
13021312
)
13031313

13041314
def test_increment_lineno(self):
1305-
src = ast.parse('1 + 1', mode='eval')
1315+
src = ast.parse('x + 1', mode='eval')
13061316
self.assertEqual(ast.increment_lineno(src, n=3), src)
13071317
self.assertEqual(ast.dump(src, include_attributes=True),
1308-
'Expression(body=BinOp(left=Constant(value=1, lineno=4, col_offset=0, '
1309-
'end_lineno=4, end_col_offset=1), op=Add(), right=Constant(value=1, '
1318+
'Expression(body=BinOp(left=Name(id=\'x\', ctx=Load(), '
1319+
'lineno=4, col_offset=0, end_lineno=4, end_col_offset=1), '
1320+
'op=Add(), right=Constant(value=1, '
13101321
'lineno=4, col_offset=4, end_lineno=4, end_col_offset=5), lineno=4, '
13111322
'col_offset=0, end_lineno=4, end_col_offset=5))'
13121323
)
13131324
# issue10869: do not increment lineno of root twice
1314-
src = ast.parse('1 + 1', mode='eval')
1325+
src = ast.parse('y + 2', mode='eval')
13151326
self.assertEqual(ast.increment_lineno(src.body, n=3), src.body)
13161327
self.assertEqual(ast.dump(src, include_attributes=True),
1317-
'Expression(body=BinOp(left=Constant(value=1, lineno=4, col_offset=0, '
1318-
'end_lineno=4, end_col_offset=1), op=Add(), right=Constant(value=1, '
1328+
'Expression(body=BinOp(left=Name(id=\'y\', ctx=Load(), '
1329+
'lineno=4, col_offset=0, end_lineno=4, end_col_offset=1), '
1330+
'op=Add(), right=Constant(value=2, '
13191331
'lineno=4, col_offset=4, end_lineno=4, end_col_offset=5), lineno=4, '
13201332
'col_offset=0, end_lineno=4, end_col_offset=5))'
13211333
)
@@ -1446,9 +1458,9 @@ def test_literal_eval(self):
14461458
self.assertEqual(ast.literal_eval('+3.25'), 3.25)
14471459
self.assertEqual(ast.literal_eval('-3.25'), -3.25)
14481460
self.assertEqual(repr(ast.literal_eval('-0.0')), '-0.0')
1449-
self.assertRaises(ValueError, ast.literal_eval, '++6')
1450-
self.assertRaises(ValueError, ast.literal_eval, '+True')
1451-
self.assertRaises(ValueError, ast.literal_eval, '2+3')
1461+
self.assertEqual(ast.literal_eval('++6'), 6)
1462+
self.assertEqual(ast.literal_eval('+True'), 1)
1463+
self.assertEqual(ast.literal_eval('2+3'), 5)
14521464

14531465
def test_literal_eval_str_int_limit(self):
14541466
with support.adjust_int_max_str_digits(4000):
@@ -1473,11 +1485,11 @@ def test_literal_eval_complex(self):
14731485
self.assertEqual(ast.literal_eval('3.25-6.75j'), 3.25-6.75j)
14741486
self.assertEqual(ast.literal_eval('-3.25-6.75j'), -3.25-6.75j)
14751487
self.assertEqual(ast.literal_eval('(3+6j)'), 3+6j)
1476-
self.assertRaises(ValueError, ast.literal_eval, '-6j+3')
1477-
self.assertRaises(ValueError, ast.literal_eval, '-6j+3j')
1478-
self.assertRaises(ValueError, ast.literal_eval, '3+-6j')
1479-
self.assertRaises(ValueError, ast.literal_eval, '3+(0+6j)')
1480-
self.assertRaises(ValueError, ast.literal_eval, '-(3+6j)')
1488+
self.assertEqual(ast.literal_eval('-6j+3'), 3-6j)
1489+
self.assertEqual(ast.literal_eval('-6j+3j'), -3j)
1490+
self.assertEqual(ast.literal_eval('3+-6j'), 3-6j)
1491+
self.assertEqual(ast.literal_eval('3+(0+6j)'), 3+6j)
1492+
self.assertEqual(ast.literal_eval('-(3+6j)'), -3-6j)
14811493

14821494
def test_literal_eval_malformed_dict_nodes(self):
14831495
malformed = ast.Dict(keys=[ast.Constant(1), ast.Constant(2)], values=[ast.Constant(3)])
@@ -1494,7 +1506,7 @@ def test_literal_eval_trailing_ws(self):
14941506
def test_literal_eval_malformed_lineno(self):
14951507
msg = r'malformed node or string on line 3:'
14961508
with self.assertRaisesRegex(ValueError, msg):
1497-
ast.literal_eval("{'a': 1,\n'b':2,\n'c':++3,\n'd':4}")
1509+
ast.literal_eval("{'a': 1,\n'b':2,\n'c':++x,\n'd':4}")
14981510

14991511
node = ast.UnaryOp(
15001512
ast.UAdd(), ast.UnaryOp(ast.UAdd(), ast.Constant(6)))
@@ -2265,7 +2277,7 @@ def test_load_const(self):
22652277
consts)
22662278

22672279
def test_literal_eval(self):
2268-
tree = ast.parse("1 + 2")
2280+
tree = ast.parse("x + 2")
22692281
binop = tree.body[0].value
22702282

22712283
new_left = ast.Constant(value=10)
@@ -2479,14 +2491,14 @@ def test_slices(self):
24792491

24802492
def test_binop(self):
24812493
s = dedent('''
2482-
(1 * 2 + (3 ) +
2494+
(1 * x + (3 ) +
24832495
4
24842496
)
24852497
''').strip()
24862498
binop = self._parse_value(s)
24872499
self._check_end_pos(binop, 2, 6)
24882500
self._check_content(s, binop.right, '4')
2489-
self._check_content(s, binop.left, '1 * 2 + (3 )')
2501+
self._check_content(s, binop.left, '1 * x + (3 )')
24902502
self._check_content(s, binop.left.right, '3')
24912503

24922504
def test_boolop(self):
@@ -3039,7 +3051,7 @@ def main():
30393051
('Module', [('FunctionDef', (1, 0, 1, 38), 'f', ('arguments', [], [], None, [], [], None, []), [('Pass', (1, 34, 1, 38))], [], None, None, [('TypeVar', (1, 6, 1, 19), 'T', ('Tuple', (1, 9, 1, 19), [('Name', (1, 10, 1, 13), 'int', ('Load',)), ('Name', (1, 15, 1, 18), 'str', ('Load',))], ('Load',))), ('TypeVarTuple', (1, 21, 1, 24), 'Ts'), ('ParamSpec', (1, 26, 1, 29), 'P')])], []),
30403052
]
30413053
single_results = [
3042-
('Interactive', [('Expr', (1, 0, 1, 3), ('BinOp', (1, 0, 1, 3), ('Constant', (1, 0, 1, 1), 1, None), ('Add',), ('Constant', (1, 2, 1, 3), 2, None)))]),
3054+
('Interactive', [('Expr', (1, 0, 1, 3), ('Constant', (1, 0, 1, 3), 3, None))]),
30433055
]
30443056
eval_results = [
30453057
('Expression', ('Constant', (1, 0, 1, 4), None, None)),
@@ -3073,9 +3085,9 @@ def main():
30733085
('Expression', ('Name', (1, 0, 1, 1), 'v', ('Load',))),
30743086
('Expression', ('List', (1, 0, 1, 7), [('Constant', (1, 1, 1, 2), 1, None), ('Constant', (1, 3, 1, 4), 2, None), ('Constant', (1, 5, 1, 6), 3, None)], ('Load',))),
30753087
('Expression', ('List', (1, 0, 1, 2), [], ('Load',))),
3076-
('Expression', ('Tuple', (1, 0, 1, 5), [('Constant', (1, 0, 1, 1), 1, None), ('Constant', (1, 2, 1, 3), 2, None), ('Constant', (1, 4, 1, 5), 3, None)], ('Load',))),
3077-
('Expression', ('Tuple', (1, 0, 1, 7), [('Constant', (1, 1, 1, 2), 1, None), ('Constant', (1, 3, 1, 4), 2, None), ('Constant', (1, 5, 1, 6), 3, None)], ('Load',))),
3078-
('Expression', ('Tuple', (1, 0, 1, 2), [], ('Load',))),
3088+
('Expression', ('Constant', (1, 0, 1, 5), (1, 2, 3), None)),
3089+
('Expression', ('Tuple', (1, 0, 1, 7), [('Constant', (1, 1, 1, 2), 1, None), ('Name', (1, 3, 1, 4), 'x', ('Load',)), ('Constant', (1, 5, 1, 6), 3, None)], ('Load',))),
3090+
('Expression', ('Constant', (1, 0, 1, 2), (), None)),
30793091
('Expression', ('Call', (1, 0, 1, 17), ('Attribute', (1, 0, 1, 7), ('Attribute', (1, 0, 1, 5), ('Attribute', (1, 0, 1, 3), ('Name', (1, 0, 1, 1), 'a', ('Load',)), 'b', ('Load',)), 'c', ('Load',)), 'd', ('Load',)), [('Subscript', (1, 8, 1, 16), ('Attribute', (1, 8, 1, 11), ('Name', (1, 8, 1, 9), 'a', ('Load',)), 'b', ('Load',)), ('Slice', (1, 12, 1, 15), ('Constant', (1, 12, 1, 13), 1, None), ('Constant', (1, 14, 1, 15), 2, None), None), ('Load',))], [])),
30803092
]
30813093
main()

Lib/test/test_builtin.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ def f(): """doc"""
372372
# test both direct compilation and compilation via AST
373373
codeobjs = []
374374
codeobjs.append(compile(codestr, "<test>", "exec", optimize=optval))
375-
tree = ast.parse(codestr)
375+
tree = ast.parse(codestr, optimize=optval)
376376
codeobjs.append(compile(tree, "<test>", "exec", optimize=optval))
377377
for code in codeobjs:
378378
ns = {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
The :func:`compile` built-in no longer ignores the ``optimize`` argument
2+
when called with the ``ast.PyCF_ONLY_AST`` flag. The :func:`ast.parse`
3+
function now accepts an optional argument ``optimize``, which it forwards to
4+
:func:`compile`. :func:`ast.parse` and :func:`ast.literal_eval` perform
5+
const folding, so ASTs are more concise and :func:`ast.literal_eval`
6+
accepts const expressions.

Python/pythonrun.c

+23
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "pycore_pyerrors.h" // _PyErr_GetRaisedException, _Py_Offer_Suggestions
2222
#include "pycore_pylifecycle.h" // _Py_UnhandledKeyboardInterrupt
2323
#include "pycore_pystate.h" // _PyInterpreterState_GET()
24+
#include "pycore_symtable.h" // _PyFuture_FromAST()
2425
#include "pycore_sysmodule.h" // _PySys_Audit()
2526
#include "pycore_traceback.h" // _PyTraceBack_Print_Indented()
2627

@@ -1790,6 +1791,24 @@ run_pyc_file(FILE *fp, PyObject *globals, PyObject *locals,
17901791
return NULL;
17911792
}
17921793

1794+
static int
1795+
call_ast_optimize(mod_ty mod, PyObject *filename, PyCompilerFlags *cf,
1796+
int optimize, PyArena *arena)
1797+
{
1798+
PyFutureFeatures future;
1799+
if (!_PyFuture_FromAST(mod, filename, &future)) {
1800+
return -1;
1801+
}
1802+
int flags = future.ff_features | cf->cf_flags;
1803+
if (optimize == -1) {
1804+
optimize = _Py_GetConfig()->optimization_level;
1805+
}
1806+
if (!_PyAST_Optimize(mod, arena, optimize, flags)) {
1807+
return -1;
1808+
}
1809+
return 0;
1810+
}
1811+
17931812
PyObject *
17941813
Py_CompileStringObject(const char *str, PyObject *filename, int start,
17951814
PyCompilerFlags *flags, int optimize)
@@ -1806,6 +1825,10 @@ Py_CompileStringObject(const char *str, PyObject *filename, int start,
18061825
return NULL;
18071826
}
18081827
if (flags && (flags->cf_flags & PyCF_ONLY_AST)) {
1828+
if (call_ast_optimize(mod, filename, flags, optimize, arena) < 0) {
1829+
_PyArena_Free(arena);
1830+
return NULL;
1831+
}
18091832
PyObject *result = PyAST_mod2obj(mod);
18101833
_PyArena_Free(arena);
18111834
return result;

0 commit comments

Comments
 (0)