Skip to content

Commit

Permalink
Initial implementation of PEP 701
Browse files Browse the repository at this point in the history
  • Loading branch information
pablogsal committed Mar 20, 2023
1 parent 094cf39 commit ed0ef34
Show file tree
Hide file tree
Showing 16 changed files with 3,779 additions and 2,018 deletions.
10 changes: 10 additions & 0 deletions Doc/library/token-list.inc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Grammar/Tokens
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,17 @@ ATEQUAL '@='
RARROW '->'
ELLIPSIS '...'
COLONEQUAL ':='
EXCLAMATION '!'

OP
AWAIT
ASYNC
TYPE_IGNORE
TYPE_COMMENT
SOFT_KEYWORD
FSTRING_START
FSTRING_MIDDLE
FSTRING_END
ERRORTOKEN

# These aren't used by the C tokenizer but are needed for tokenize.py
Expand Down
27 changes: 25 additions & 2 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,8 @@ atom[expr_ty]:
| 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) }
| 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) }
| 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) }
| &STRING strings
# | gstring
| &(STRING|FSTRING_START) strings
| NUMBER
| &'(' (tuple | group | genexp)
| &'[' (list | listcomp)
Expand Down Expand Up @@ -877,7 +878,29 @@ lambda_param[arg_ty]: a=NAME { _PyAST_arg(a->v.Name.id, NULL, NULL, EXTRA) }
# LITERALS
# ========

strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
gstring_middle[expr_ty]:
| gstring_replacement_field
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token2(p, t) }
# There are some shenanigans with the gstring_format_spec: Don't try to put it in its own rule
# or otherwise it will try to parse the first token with the regular tokenizer mode (due to the EXTRA).
# TODO: (Ideally we need a way similar to 'memo' so the parser can set the tokenize mode on fstring/normal)
gstring_replacement_field[expr_ty]:
| expr_start='{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[
conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) ? NULL : conv }
] format=[
':' spec=gstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
] &&'}' {
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
}
gstring_format_spec[expr_ty]:
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token2(p, t) }
| gstring_replacement_field
gstring[expr_ty]:
| a=FSTRING_START b=gstring_middle* c=FSTRING_END { deal_with_gstring2(p, a, (asdl_expr_seq*)b, c) }

string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_token(p, s) }
strings[expr_ty] (memo): a[asdl_expr_seq*]=(gstring|string)+ { _PyPegen_concatenate_strings2(p, a, EXTRA) }
# strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }

list[expr_ty]:
| '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) }
Expand Down
20 changes: 12 additions & 8 deletions Include/internal/pycore_token.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,18 @@ extern "C" {
#define RARROW 51
#define ELLIPSIS 52
#define COLONEQUAL 53
#define OP 54
#define AWAIT 55
#define ASYNC 56
#define TYPE_IGNORE 57
#define TYPE_COMMENT 58
#define SOFT_KEYWORD 59
#define ERRORTOKEN 60
#define N_TOKENS 64
#define EXCLAMATION 54
#define OP 55
#define AWAIT 56
#define ASYNC 57
#define TYPE_IGNORE 58
#define TYPE_COMMENT 59
#define SOFT_KEYWORD 60
#define FSTRING_START 61
#define FSTRING_MIDDLE 62
#define FSTRING_END 63
#define ERRORTOKEN 64
#define N_TOKENS 68
#define NT_OFFSET 256

/* Special definitions for cooperation with parser */
Expand Down
96 changes: 76 additions & 20 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,13 +329,13 @@ def test_ast_line_numbers_multiline_fstring(self):
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 3)
self.assertEqual(t.body[1].value.values[2].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 4)
self.assertEqual(t.body[1].value.values[2].lineno, 6)
self.assertEqual(t.body[1].col_offset, 0)
self.assertEqual(t.body[1].value.col_offset, 0)
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
self.assertEqual(t.body[1].value.values[0].col_offset, 4)
self.assertEqual(t.body[1].value.values[1].col_offset, 2)
self.assertEqual(t.body[1].value.values[2].col_offset, 11)
# NOTE: the following lineno information and col_offset is correct for
# expressions within FormattedValues.
binop = t.body[1].value.values[1].value
Expand Down Expand Up @@ -366,13 +366,13 @@ def test_ast_line_numbers_multiline_fstring(self):
self.assertEqual(t.body[0].lineno, 2)
self.assertEqual(t.body[0].value.lineno, 2)
self.assertEqual(t.body[0].value.values[0].lineno, 2)
self.assertEqual(t.body[0].value.values[1].lineno, 2)
self.assertEqual(t.body[0].value.values[2].lineno, 2)
self.assertEqual(t.body[0].value.values[1].lineno, 3)
self.assertEqual(t.body[0].value.values[2].lineno, 3)
self.assertEqual(t.body[0].col_offset, 0)
self.assertEqual(t.body[0].value.col_offset, 4)
self.assertEqual(t.body[0].value.values[0].col_offset, 4)
self.assertEqual(t.body[0].value.values[1].col_offset, 4)
self.assertEqual(t.body[0].value.values[2].col_offset, 4)
self.assertEqual(t.body[0].value.values[0].col_offset, 8)
self.assertEqual(t.body[0].value.values[1].col_offset, 10)
self.assertEqual(t.body[0].value.values[2].col_offset, 17)
# Check {blech}
self.assertEqual(t.body[0].value.values[1].value.lineno, 3)
self.assertEqual(t.body[0].value.values[1].value.end_lineno, 3)
Expand All @@ -387,6 +387,20 @@ def test_ast_line_numbers_with_parentheses(self):
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 1)
# check the joinedstr location
joinedstr = t.body[0].value
self.assertEqual(type(joinedstr), ast.JoinedStr)
self.assertEqual(joinedstr.lineno, 3)
self.assertEqual(joinedstr.end_lineno, 3)
self.assertEqual(joinedstr.col_offset, 4)
self.assertEqual(joinedstr.end_col_offset, 17)
# check the formatted value location
fv = t.body[0].value.values[1]
self.assertEqual(type(fv), ast.FormattedValue)
self.assertEqual(fv.lineno, 3)
self.assertEqual(fv.end_lineno, 3)
self.assertEqual(fv.col_offset, 7)
self.assertEqual(fv.end_col_offset, 16)
# check the test(t) location
call = t.body[0].value.values[1].value
self.assertEqual(type(call), ast.Call)
Expand Down Expand Up @@ -415,9 +429,9 @@ def test_ast_line_numbers_with_parentheses(self):
# check the first wat
self.assertEqual(type(wat1), ast.Constant)
self.assertEqual(wat1.lineno, 4)
self.assertEqual(wat1.end_lineno, 6)
self.assertEqual(wat1.col_offset, 12)
self.assertEqual(wat1.end_col_offset, 18)
self.assertEqual(wat1.end_lineno, 5)
self.assertEqual(wat1.col_offset, 14)
self.assertEqual(wat1.end_col_offset, 26)
# check the call
call = middle.value
self.assertEqual(type(call), ast.Call)
Expand All @@ -427,9 +441,9 @@ def test_ast_line_numbers_with_parentheses(self):
self.assertEqual(call.end_col_offset, 31)
# check the second wat
self.assertEqual(type(wat2), ast.Constant)
self.assertEqual(wat2.lineno, 4)
self.assertEqual(wat2.lineno, 5)
self.assertEqual(wat2.end_lineno, 6)
self.assertEqual(wat2.col_offset, 12)
self.assertEqual(wat2.col_offset, 32)
self.assertEqual(wat2.end_col_offset, 18)

def test_docstring(self):
Expand Down Expand Up @@ -618,6 +632,7 @@ def test_format_specifier_expressions(self):
self.assertEqual(f'{-10:-{"#"}1{0}x}', ' -0xa')
self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', ' -0xa')
self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
self.assertEqual(f'result: {value:{width:{0}}.{precision:1}}', 'result: 12.35')

self.assertAllRaise(SyntaxError,
"""f-string: invalid conversion character 'r{"': """
Expand All @@ -632,11 +647,6 @@ def test_format_specifier_expressions(self):
"f'{4:{/5}}'",
])

self.assertAllRaise(SyntaxError, "f-string: expressions nested too deeply",
[# Can't nest format specifiers.
"f'result: {value:{width:{0}}.{precision:1}}'",
])

self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
[# No expansion inside conversion or for
# the : or ! itself.
Expand Down Expand Up @@ -848,6 +858,50 @@ def test_lambda(self):
["f'{lambda x:x}'",
])

def test_valid_prefixes(self):
self.assertEqual(F'{1}', "1")
self.assertEqual(FR'{2}', "2")
self.assertEqual(fR'{3}', "3")

def test_roundtrip_raw_quotes(self):
self.assertEqual(fr"\'", "\\'")
self.assertEqual(fr'\"', '\\"')
self.assertEqual(fr'\"\'', '\\"\\\'')
self.assertEqual(fr'\'\"', '\\\'\\"')
self.assertEqual(fr'\"\'\"', '\\"\\\'\\"')
self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'')
self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'')

def test_fstring_backslash_before_double_bracket(self):
self.assertEqual(f'\{{\}}', '\\{\\}')
self.assertEqual(f'\{{', '\\{')
self.assertEqual(f'\{{{1+1}', '\\{2')
self.assertEqual(f'\}}{1+1}', '\\}2')
self.assertEqual(f'{1+1}\}}', '2\\}')
self.assertEqual(fr'\{{\}}', '\\{\\}')
self.assertEqual(fr'\{{', '\\{')
self.assertEqual(fr'\{{{1+1}', '\\{2')
self.assertEqual(fr'\}}{1+1}', '\\}2')
self.assertEqual(fr'{1+1}\}}', '2\\}')

def test_fstring_backslash_prefix_raw(self):
self.assertEqual(f'\\', '\\')
self.assertEqual(f'\\\\', '\\\\')
self.assertEqual(fr'\\', r'\\')
self.assertEqual(fr'\\\\', r'\\\\')
self.assertEqual(rf'\\', r'\\')
self.assertEqual(rf'\\\\', r'\\\\')
self.assertEqual(Rf'\\', R'\\')
self.assertEqual(Rf'\\\\', R'\\\\')
self.assertEqual(fR'\\', R'\\')
self.assertEqual(fR'\\\\', R'\\\\')
self.assertEqual(FR'\\', R'\\')
self.assertEqual(FR'\\\\', R'\\\\')

def test_fstring_format_spec_greedy_matching(self):
self.assertEqual(f"{1:}}}", "1}")
self.assertEqual(f"{1:>3{5}}}}", " 1}")

def test_yield(self):
# Not terribly useful, but make sure the yield turns
# a function into a generator
Expand Down Expand Up @@ -1314,6 +1368,7 @@ def __repr__(self):
self.assertEqual(f'X{x =}Y', 'Xx ='+repr(x)+'Y')
self.assertEqual(f'X{x= }Y', 'Xx= '+repr(x)+'Y')
self.assertEqual(f'X{x = }Y', 'Xx = '+repr(x)+'Y')
self.assertEqual(f"sadsd {1 + 1 = :{1 + 1:1d}f}", "sadsd 1 + 1 = 2.000000")

# These next lines contains tabs. Backslash escapes don't
# work in f-strings.
Expand All @@ -1324,6 +1379,7 @@ def __repr__(self):
#self.assertEqual(f'X{x =}Y', 'Xx\t='+repr(x)+'Y')
#self.assertEqual(f'X{x = }Y', 'Xx\t=\t'+repr(x)+'Y')


def test_walrus(self):
x = 20
# This isn't an assignment expression, it's 'x', with a format
Expand Down
27 changes: 16 additions & 11 deletions Lib/token.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ed0ef34

Please sign in to comment.