Merge pull request #233 from rocky/fstring

Revise format string handling
This commit is contained in:
R. Bernstein
2019-05-13 09:43:47 -04:00
committed by GitHub
9 changed files with 158 additions and 142 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -39,6 +39,30 @@ source = 'foo'
source = (f"__file__ = r'''{os.path.abspath(filename)}'''\n"
+ source + "\ndel __file__")
# From 3.7.3 datalasses.py
# Note how { and } are *not* escaped here
f = 'one'
name = 'two'
assert(f"{f}{'{{name}}'} {f}{'{name}'}") == 'one{{name}} one{name}'
# From 3.7.3 dataclasses.py
log_rounds = 5
assert "05$" == f'{log_rounds:02d}$'
def testit(a, b, l):
# print(l)
return l
# The call below shows the need for BUILD_STRING to count expr arguments.
# Also note that we use {{ }} to escape braces in contrast to the example
# above.
def _repr_fn(fields):
return testit('__repr__',
('self',),
['return xx + f"(' +
', '.join([f"{f}={{self.{f}!r}}"
for f in fields]) +
')"'])
fields = ['a', 'b', 'c']
assert _repr_fn(fields) == ['return xx + f"(a={self.a!r}, b={self.b!r}, c={self.c!r})"']

View File

@@ -61,7 +61,6 @@ class PythonParser(GenericASTBuilder):
'imports_cont',
'kvlist_n',
# Python 3.6+
'joined_str',
'come_from_loops',
]
self.collect = frozenset(nt_list)
@@ -83,7 +82,7 @@ class PythonParser(GenericASTBuilder):
# FIXME: would love to do expr, sstmts, stmts and
# so on but that would require major changes to the
# semantic actions
self.singleton = frozenset(('str', 'joined_str', 'store', '_stmts', 'suite_stmts_opt',
self.singleton = frozenset(('str', 'store', '_stmts', 'suite_stmts_opt',
'inplace_op'))
# Instructions filled in from scanner
self.insts = []

View File

@@ -188,21 +188,14 @@ class Python36Parser(Python35Parser):
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'FORMAT_VALUE':
rules_str = """
expr ::= fstring_single
fstring_single ::= expr FORMAT_VALUE
expr ::= fstring_expr
fstring_expr ::= expr FORMAT_VALUE
str ::= LOAD_CONST
formatted_value ::= fstring_expr
formatted_value ::= str
expr ::= formatted_value1
formatted_value1 ::= expr FORMAT_VALUE
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'FORMAT_VALUE_ATTR':
rules_str = """
expr ::= fstring_single
fstring_single ::= expr expr FORMAT_VALUE_ATTR
expr ::= formatted_value2
formatted_value2 ::= expr expr FORMAT_VALUE_ATTR
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'MAKE_FUNCTION_8':
@@ -246,17 +239,12 @@ class Python36Parser(Python35Parser):
"""
self.addRule(rules_str, nop_func)
elif opname == 'BUILD_STRING':
elif opname.startswith('BUILD_STRING'):
v = token.attr
joined_str_n = "formatted_value_%s" % v
rules_str = """
expr ::= fstring_multi
fstring_multi ::= joined_str BUILD_STRING
fstr ::= expr
joined_str ::= fstr+
fstring_multi ::= %s BUILD_STRING
%s ::= %sBUILD_STRING
""" % (joined_str_n, joined_str_n, "formatted_value " * v)
expr ::= joined_str
joined_str ::= %sBUILD_STRING_%d
""" % ("expr " * v, v)
self.add_unique_doc_rules(rules_str, customize)
if 'FORMAT_VALUE_ATTR' in self.seen_ops:
rules_str = """

View File

@@ -33,6 +33,8 @@ class Scanner36(Scanner3):
t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1):
t.kind = 'CALL_FUNCTION_EX_KW'
pass
elif t.op == self.opc.BUILD_STRING:
t.kind = 'BUILD_STRING_%s' % t.attr
elif t.op == self.opc.CALL_FUNCTION_KW:
t.kind = 'CALL_FUNCTION_KW_%s' % t.attr
elif t.op == self.opc.FORMAT_VALUE:

View File

@@ -27,71 +27,27 @@ else:
maxint = sys.maxint
# Operator precidence
# See https://docs.python.org/2/reference/expressions.html
# or https://docs.python.org/3/reference/expressions.html
# for a list. The top to down order here is reversed
# from the list in the above lin.
# Operator precidence See
# https://docs.python.org/2/reference/expressions.html#operator-precedence
# or
# https://docs.python.org/3/reference/expressions.html#operator-precedence
# for a list. We keep the same top-to-botom order here as in the above links,
# so we start with low precedence (high values) and go down in value.
# Things at the top of this list below with low-value precidence will
# tend to have parenthesis around them. Things at the bottom
# Things at the bottom of this list below with high precedence (low value) will
# tend to have parenthesis around them. Things at the top
# of the list will tend not to have parenthesis around them.
# Note: The values in this table tend to be even value. Inside
# Note: The values in this table are even numbers. Inside
# various templates we use odd values. Avoiding equal-precedent comparisons
# avoids ambiguity what to do when the precedence is equal.
PRECEDENCE = {
'list': 0,
'dict': 0,
'unary_convert': 0,
'dict_comp': 0,
'set_comp': 0,
'set_comp_expr': 0,
'list_comp': 0,
'generator_exp': 0,
'yield': 102,
'yield_from': 102,
'attribute': 2,
'subscript': 2,
'subscript2': 2,
'store_subscript': 2,
'delete_subscript': 2,
'slice0': 2,
'slice1': 2,
'slice2': 2,
'slice3': 2,
'buildslice2': 2,
'buildslice3': 2,
'call': 2,
'BINARY_POWER': 4,
'unary_expr': 6,
'BINARY_MULTIPLY': 8,
'BINARY_DIVIDE': 8,
'BINARY_TRUE_DIVIDE': 8,
'BINARY_FLOOR_DIVIDE': 8,
'BINARY_MODULO': 8,
'BINARY_ADD': 10,
'BINARY_SUBTRACT': 10,
'BINARY_LSHIFT': 12, # Shifts <<
'BINARY_RSHIFT': 12, # Shifts >>
'BINARY_AND': 14, # Bitwise AND
'BINARY_XOR': 16, # Bitwise XOR
'BINARY_OR': 18, # Bitwise OR
'compare': 20, # in, not in, is, is not, <, <=, >, >=, !=, ==
'unary_not': 22, # Boolean NOT
'and': 24, # Boolean AND
'ret_and': 24,
'or': 26, # Boolean OR
'ret_or': 26,
'_mklambda': 30,
'conditional': 28, # Conditional expression
'conditional_lamdba': 28, # Lambda expression
@@ -100,10 +56,56 @@ PRECEDENCE = {
'if_expr_true': 28,
'ret_cond': 28,
'_mklambda': 30,
'or': 26, # Boolean OR
'ret_or': 26,
'yield': 102,
'yield_from': 102
'and': 24, # Boolean AND
'compare': 20, # in, not in, is, is not, <, <=, >, >=, !=, ==
'ret_and': 24,
'unary_not': 22, # Boolean NOT
'BINARY_AND': 14, # Bitwise AND
'BINARY_OR': 18, # Bitwise OR
'BINARY_XOR': 16, # Bitwise XOR
'BINARY_LSHIFT': 12, # Shifts <<
'BINARY_RSHIFT': 12, # Shifts >>
'BINARY_ADD': 10, # -
'BINARY_SUBTRACT': 10, # +
'BINARY_DIVIDE': 8, # /
'BINARY_FLOOR_DIVIDE': 8, # //
'BINARY_MATRIX_MULTIPLY': 8, # @
'BINARY_MODULO': 8, # Remainder, %
'BINARY_MULTIPLY': 8, # *
'BINARY_TRUE_DIVIDE': 8, # Division /
'unary_expr': 6, # +x, -x, ~x
'BINARY_POWER': 4, # Exponentiation, *
'attribute': 2, # x.attribute
'buildslice2': 2, # x[index]
'buildslice3': 2, # x[index:index]
'call': 2, # x(arguments...)
'delete_subscript': 2,
'slice0': 2,
'slice1': 2,
'slice2': 2,
'slice3': 2,
'store_subscript': 2,
'subscript': 2,
'subscript2': 2,
'dict': 0, # {expressions...}
'dict_comp': 0,
'generator_exp': 0, # (expressions...)
'list': 0, # [expressions...]
'list_comp': 0,
'set_comp': 0,
'set_comp_expr': 0,
'unary_convert': 0,
}
LINE_LENGTH = 80

View File

@@ -41,17 +41,11 @@ def customize_for_version36(self, version):
PRECEDENCE['call_ex_kw3'] = 1
PRECEDENCE['call_ex_kw4'] = 1
PRECEDENCE['unmap_dict'] = 0
PRECEDENCE['formatted_value1'] = 100
TABLE_DIRECT.update({
'tryfinally36': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n',
(1, 'returns'), 3 ),
'fstring_expr': ( "{%c%{conversion}}",
(0, 'expr') ),
# FIXME: the below assumes the format strings
# don't have ''' in them. Fix this properly
'fstring_single': ( "f'''{%c%{conversion}}'''", 0),
'formatted_value_attr': ( "f'''{%c%{conversion}}%{string}'''",
(0, 'expr')),
'func_args36': ( "%c(**", 0),
'try_except36': ( '%|try:\n%+%c%-%c\n\n', 1, -2 ),
'except_return': ( '%|except:\n%+%c%-', 3 ),
@@ -129,7 +123,7 @@ def customize_for_version36(self, version):
expr = node[1]
assert expr == 'expr'
value = self.format_pos_args(expr)
if value == '':
fmt = "%c(%p)"
@@ -157,7 +151,7 @@ def customize_for_version36(self, version):
self.template_engine(
(fmt,
(0, 'expr'), (2, 'build_map_unpack_with_call', 100)), node)
self.prune()
self.n_call_ex_kw2 = call_ex_kw2
@@ -166,18 +160,18 @@ def customize_for_version36(self, version):
BUILD_MAP_UNPACK_WITH_CALL"""
self.preorder(node[0])
self.write('(')
value = self.format_pos_args(node[1][0])
if value == '':
pass
else:
self.write(value)
self.write(', ')
self.write('*')
self.preorder(node[1][1])
self.write(', ')
kwargs = node[2]
if kwargs == 'expr':
kwargs = kwargs[0]
@@ -425,7 +419,6 @@ def customize_for_version36(self, version):
node.string = escape_format(fmt_node[0].attr)
else:
node.string = fmt_node
self.default(node)
self.n_formatted_value_attr = n_formatted_value_attr
@@ -436,60 +429,72 @@ def customize_for_version36(self, version):
else:
data = fmt_node.attr
node.conversion = FSTRING_CONVERSION_MAP.get(data, '')
return node.conversion
def n_fstring_expr(node):
f_conversion(node)
self.default(node)
self.n_fstring_expr = n_fstring_expr
def n_fstr(node):
if node[0] == 'expr' and node[0][0] == 'fstring_expr':
f_conversion(node[0][0])
self.default(node[0][0])
else:
value = strip_quotes(self.traverse(node[0], indent=''))
pass
self.write(value)
def n_formatted_value1(node):
expr = node[0]
assert expr == 'expr'
value = self.traverse(expr, indent='')
conversion = f_conversion(node)
f_str = "f%s" % escape_string("{%s%s}" % (value, conversion))
self.write(f_str)
self.prune()
self.n_fstr = n_fstr
def n_fstring_single(node):
attr4 = len(node) == 3 and node[-1] == 'FORMAT_VALUE_ATTR' and node[-1].attr == 4
if attr4 and hasattr(node[0][0], 'attr'):
assert node[0] == 'expr'
self.n_formatted_value1 = n_formatted_value1
def n_formatted_value2(node):
p = self.prec
self.prec = 100
expr = node[0]
assert expr == 'expr'
value = self.traverse(expr, indent='')
format_value_attr = node[-1]
assert format_value_attr == 'FORMAT_VALUE_ATTR'
attr = format_value_attr.attr
if attr == 4:
assert node[1] == 'expr'
self.write("{%s:%s}" % (node[0][0].attr, node[1][0].attr))
self.prune()
fmt = strip_quotes(self.traverse(node[1], indent=''))
conversion = ":%s" % fmt
else:
f_conversion(node)
self.default(node)
self.n_fstring_single = n_fstring_single
conversion = FSTRING_CONVERSION_MAP.get(attr, '')
f_str = "f%s" % escape_string("{%s%s}" % (value, conversion))
self.write(f_str)
self.prec = p
self.prune()
self.n_formatted_value2 = n_formatted_value2
def n_joined_str(node):
p = self.prec
self.prec = 100
result = ''
for fstr_node in node:
assert fstr_node == 'fstr'
assert fstr_node[0] == 'expr'
subnode = fstr_node[0][0]
if subnode.kind == 'fstring_expr':
# Don't include outer f'...'
f_conversion(subnode)
data = strip_quotes(self.traverse(subnode, indent=''))
result += data
elif subnode == 'LOAD_CONST':
result += strip_quotes(escape_string(subnode.attr))
elif subnode == 'fstring_single':
f_conversion(subnode)
data = self.traverse(subnode, indent='')
if data[0:1] == 'f':
data = strip_quotes(data[1:])
result += data
for expr in node[:-1]:
assert expr == 'expr'
value = self.traverse(expr, indent='')
if expr[0].kind.startswith('formatted_value'):
# remove leading 'f'
assert value.startswith('f')
value = value[1:]
pass
else:
result += strip_quotes(self.traverse(subnode, indent=''))
pass
# {{ and }} in Python source-code format strings mean
# { and } respectively. But only when *not* part of a
# formatted value. However in the LOAD_CONST
# bytecode, the escaping of the braces has been
# removed. So we need to put back the braces escaping in
# reconstructing the source.
assert expr[0] == 'LOAD_CONST'
value = value.replace("{", "{{").replace("}", "}}")
# Remove leading quotes
result += strip_quotes(value)
pass
self.write('f%s' % escape_string(result))
self.prec = p
self.prune()
self.n_joined_str = n_joined_str

View File

@@ -1837,11 +1837,7 @@ class SourceWalker(GenericASTTraversal, object):
typ = m.group('type') or '{'
node = startnode
if m.group('child'):
try:
node = node[int(m.group('child'))]
except:
from trepan.api import debug; debug()
pass
node = node[int(m.group('child'))]
if typ == '%': self.write('%')
elif typ == '+':