Merge pull request #233 from rocky/fstring

Revise format string handling
2025-08-03 00:45:53 +08:00 · 2019-05-13 09:43:47 -04:00
parent fe786b2b95 8b5e0f49f8
commit 251eb6da1b
9 changed files with 158 additions and 142 deletions
--- a/test/bytecode_3.6_run/01_fstring.pyc
+++ b/test/bytecode_3.6_run/01_fstring.pyc
--- a/test/bytecode_3.7_run/01_fstring.pyc
+++ b/test/bytecode_3.7_run/01_fstring.pyc
--- a/test/simple_source/bug36/01_fstring.py
+++ b/test/simple_source/bug36/01_fstring.py
@@ -39,6 +39,30 @@ source = 'foo'
 source = (f"__file__ = r'''{os.path.abspath(filename)}'''\n"
          + source + "\ndel __file__")

-# From 3.7.3 datalasses.py
+# Note how { and } are *not* escaped here
+f = 'one'
+name = 'two'
+assert(f"{f}{'{{name}}'} {f}{'{name}'}") == 'one{{name}} one{name}'
+
+# From 3.7.3 dataclasses.py
 log_rounds  = 5
 assert "05$" == f'{log_rounds:02d}$'
+
+
+def testit(a, b, l):
+    # print(l)
+    return l
+
+# The call below shows the need for BUILD_STRING to count expr arguments.
+# Also note that we use {{ }} to escape braces in contrast to the example
+# above.
+def _repr_fn(fields):
+    return testit('__repr__',
+                  ('self',),
+                  ['return xx + f"(' +
+                   ', '.join([f"{f}={{self.{f}!r}}"
+                              for f in fields]) +
+                   ')"'])
+
+fields = ['a', 'b', 'c']
+assert _repr_fn(fields) == ['return xx + f"(a={self.a!r}, b={self.b!r}, c={self.c!r})"']
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -61,7 +61,6 @@ class PythonParser(GenericASTBuilder):
            'imports_cont',
            'kvlist_n',
            # Python 3.6+
-            'joined_str',
            'come_from_loops',
            ]
        self.collect = frozenset(nt_list)
@@ -83,7 +82,7 @@ class PythonParser(GenericASTBuilder):
        # FIXME: would love to do expr, sstmts, stmts and
        # so on but that would require major changes to the
        # semantic actions
-        self.singleton = frozenset(('str', 'joined_str', 'store', '_stmts', 'suite_stmts_opt',
+        self.singleton = frozenset(('str', 'store', '_stmts', 'suite_stmts_opt',
                                    'inplace_op'))
        # Instructions filled in from scanner
        self.insts = []
--- a/uncompyle6/parsers/parse36.py
+++ b/uncompyle6/parsers/parse36.py
@@ -188,21 +188,14 @@ class Python36Parser(Python35Parser):
                    self.add_unique_doc_rules(rules_str, customize)
            elif opname == 'FORMAT_VALUE':
                rules_str = """
-                    expr            ::= fstring_single
-                    fstring_single  ::= expr FORMAT_VALUE
-                    expr            ::= fstring_expr
-                    fstring_expr    ::= expr FORMAT_VALUE
-
-                    str             ::= LOAD_CONST
-                    formatted_value ::= fstring_expr
-                    formatted_value ::= str
-
+                    expr              ::= formatted_value1
+                    formatted_value1  ::= expr FORMAT_VALUE
                """
                self.add_unique_doc_rules(rules_str, customize)
            elif opname == 'FORMAT_VALUE_ATTR':
                rules_str = """
-                expr            ::= fstring_single
-                fstring_single  ::= expr expr FORMAT_VALUE_ATTR
+                expr              ::= formatted_value2
+                formatted_value2  ::= expr expr FORMAT_VALUE_ATTR
                """
                self.add_unique_doc_rules(rules_str, customize)
            elif opname == 'MAKE_FUNCTION_8':
@@ -246,17 +239,12 @@ class Python36Parser(Python35Parser):
                """
                self.addRule(rules_str, nop_func)

-            elif opname == 'BUILD_STRING':
+            elif opname.startswith('BUILD_STRING'):
                v = token.attr
-                joined_str_n = "formatted_value_%s" % v
                rules_str = """
-                    expr                 ::= fstring_multi
-                    fstring_multi        ::= joined_str BUILD_STRING
-                    fstr                 ::= expr
-                    joined_str           ::= fstr+
-                    fstring_multi        ::= %s BUILD_STRING
-                    %s                   ::= %sBUILD_STRING
-                """ % (joined_str_n, joined_str_n, "formatted_value " * v)
+                    expr                 ::= joined_str
+                    joined_str           ::= %sBUILD_STRING_%d
+                """ % ("expr " * v, v)
                self.add_unique_doc_rules(rules_str, customize)
                if 'FORMAT_VALUE_ATTR' in self.seen_ops:
                    rules_str = """
--- a/uncompyle6/scanners/scanner36.py
+++ b/uncompyle6/scanners/scanner36.py
@@ -33,6 +33,8 @@ class Scanner36(Scanner3):
                 t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1):
                t.kind = 'CALL_FUNCTION_EX_KW'
                pass
+            elif t.op == self.opc.BUILD_STRING:
+                t.kind = 'BUILD_STRING_%s' % t.attr
            elif t.op == self.opc.CALL_FUNCTION_KW:
                t.kind = 'CALL_FUNCTION_KW_%s' % t.attr
            elif t.op == self.opc.FORMAT_VALUE:
--- a/uncompyle6/semantics/consts.py
+++ b/uncompyle6/semantics/consts.py
@@ -27,71 +27,27 @@ else:
    maxint = sys.maxint


-# Operator precidence
-# See https://docs.python.org/2/reference/expressions.html
-# or https://docs.python.org/3/reference/expressions.html
-# for a list. The top to down order here is reversed
-# from the list in the above lin.
+# Operator precidence See
+# https://docs.python.org/2/reference/expressions.html#operator-precedence
+# or
+# https://docs.python.org/3/reference/expressions.html#operator-precedence
+# for a list. We keep the same top-to-botom order here as in the above links,
+# so we start with low precedence (high values) and go down in value.

-# Things at the top of this list below with low-value precidence will
-# tend to have parenthesis around them. Things at the bottom
+# Things at the bottom of this list below with high precedence (low value) will
+# tend to have parenthesis around them. Things at the top
 # of the list will tend not to have parenthesis around them.

-# Note: The values in this table tend to be even value. Inside
+# Note: The values in this table are even numbers. Inside
 # various templates we use odd values. Avoiding equal-precedent comparisons
 # avoids ambiguity what to do when the precedence is equal.


 PRECEDENCE = {
-    'list':                   0,
-    'dict':                   0,
-    'unary_convert':          0,
-    'dict_comp':              0,
-    'set_comp':               0,
-    'set_comp_expr':          0,
-    'list_comp':              0,
-    'generator_exp':          0,
+    'yield':                 102,
+    'yield_from':            102,

-    'attribute':              2,
-    'subscript':              2,
-    'subscript2':             2,
-    'store_subscript':        2,
-    'delete_subscript':       2,
-    'slice0':                 2,
-    'slice1':                 2,
-    'slice2':                 2,
-    'slice3':                 2,
-    'buildslice2':            2,
-    'buildslice3':            2,
-    'call':                   2,
-
-    'BINARY_POWER':           4,
-
-    'unary_expr':             6,
-
-    'BINARY_MULTIPLY':        8,
-    'BINARY_DIVIDE':          8,
-    'BINARY_TRUE_DIVIDE':     8,
-    'BINARY_FLOOR_DIVIDE':    8,
-    'BINARY_MODULO':          8,
-
-    'BINARY_ADD':             10,
-    'BINARY_SUBTRACT':        10,
-
-    'BINARY_LSHIFT':          12, # Shifts <<
-    'BINARY_RSHIFT':          12, # Shifts >>
-
-    'BINARY_AND':             14, # Bitwise AND
-    'BINARY_XOR':             16, # Bitwise XOR
-    'BINARY_OR':              18, # Bitwise OR
-
-    'compare':                20, # in, not in, is, is not, <, <=, >, >=, !=, ==
-    'unary_not':              22, # Boolean NOT
-    'and':                    24, # Boolean AND
-    'ret_and':                24,
-
-    'or':                     26, # Boolean OR
-    'ret_or':                 26,
+    '_mklambda':              30,

    'conditional':            28, # Conditional expression
    'conditional_lamdba':     28, # Lambda expression
@@ -100,10 +56,56 @@ PRECEDENCE = {
    'if_expr_true':           28,
    'ret_cond':               28,

-    '_mklambda':              30,
+    'or':                     26, # Boolean OR
+    'ret_or':                 26,

-    'yield':                 102,
-    'yield_from':            102
+    'and':                    24, # Boolean AND
+    'compare':                20, # in, not in, is, is not, <, <=, >, >=, !=, ==
+    'ret_and':                24,
+    'unary_not':              22, # Boolean NOT
+
+    'BINARY_AND':             14, # Bitwise AND
+    'BINARY_OR':              18, # Bitwise OR
+    'BINARY_XOR':             16, # Bitwise XOR
+
+    'BINARY_LSHIFT':          12, # Shifts <<
+    'BINARY_RSHIFT':          12, # Shifts >>
+
+    'BINARY_ADD':             10, # -
+    'BINARY_SUBTRACT':        10, # +
+
+    'BINARY_DIVIDE':          8,  # /
+    'BINARY_FLOOR_DIVIDE':    8,  # //
+    'BINARY_MATRIX_MULTIPLY': 8,  # @
+    'BINARY_MODULO':          8,  # Remainder, %
+    'BINARY_MULTIPLY':        8,  # *
+    'BINARY_TRUE_DIVIDE':     8,  # Division /
+
+    'unary_expr':             6,  # +x, -x, ~x
+
+    'BINARY_POWER':           4,  # Exponentiation, *
+
+    'attribute':              2,  # x.attribute
+    'buildslice2':            2,  # x[index]
+    'buildslice3':            2,  # x[index:index]
+    'call':                   2,  # x(arguments...)
+    'delete_subscript':       2,
+    'slice0':                 2,
+    'slice1':                 2,
+    'slice2':                 2,
+    'slice3':                 2,
+    'store_subscript':        2,
+    'subscript':              2,
+    'subscript2':             2,
+
+    'dict':                   0,  # {expressions...}
+    'dict_comp':              0,
+    'generator_exp':          0,  # (expressions...)
+    'list':                   0,  # [expressions...]
+    'list_comp':              0,
+    'set_comp':               0,
+    'set_comp_expr':          0,
+    'unary_convert':          0,
 }

 LINE_LENGTH = 80
--- a/uncompyle6/semantics/customize36.py
+++ b/uncompyle6/semantics/customize36.py
@@ -41,17 +41,11 @@ def customize_for_version36(self, version):
    PRECEDENCE['call_ex_kw3'] = 1
    PRECEDENCE['call_ex_kw4'] = 1
    PRECEDENCE['unmap_dict']  = 0
+    PRECEDENCE['formatted_value1'] = 100

    TABLE_DIRECT.update({
        'tryfinally36':     ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n',
                              (1, 'returns'), 3 ),
-        'fstring_expr':     ( "{%c%{conversion}}",
-                              (0, 'expr') ),
-        # FIXME: the below assumes the format strings
-        # don't have ''' in them. Fix this properly
-        'fstring_single':   ( "f'''{%c%{conversion}}'''", 0),
-        'formatted_value_attr': ( "f'''{%c%{conversion}}%{string}'''",
-                                  (0, 'expr')),
        'func_args36':      ( "%c(**", 0),
        'try_except36':     ( '%|try:\n%+%c%-%c\n\n', 1, -2 ),
        'except_return':    ( '%|except:\n%+%c%-', 3 ),
@@ -129,7 +123,7 @@ def customize_for_version36(self, version):

        expr = node[1]
        assert expr == 'expr'
-        
+
        value = self.format_pos_args(expr)
        if value == '':
            fmt = "%c(%p)"
@@ -157,7 +151,7 @@ def customize_for_version36(self, version):
        self.template_engine(
            (fmt,
            (0, 'expr'), (2, 'build_map_unpack_with_call', 100)), node)
-        
+
        self.prune()
    self.n_call_ex_kw2 = call_ex_kw2

@@ -166,18 +160,18 @@ def customize_for_version36(self, version):
        BUILD_MAP_UNPACK_WITH_CALL"""
        self.preorder(node[0])
        self.write('(')
-        
+
        value = self.format_pos_args(node[1][0])
        if value == '':
            pass
        else:
            self.write(value)
            self.write(', ')
-       
+
        self.write('*')
        self.preorder(node[1][1])
        self.write(', ')
-        
+
        kwargs = node[2]
        if kwargs == 'expr':
            kwargs = kwargs[0]
@@ -425,7 +419,6 @@ def customize_for_version36(self, version):
            node.string = escape_format(fmt_node[0].attr)
        else:
            node.string = fmt_node
-
        self.default(node)
    self.n_formatted_value_attr = n_formatted_value_attr

@@ -436,60 +429,72 @@ def customize_for_version36(self, version):
        else:
            data = fmt_node.attr
        node.conversion = FSTRING_CONVERSION_MAP.get(data, '')
+        return node.conversion

-    def n_fstring_expr(node):
-        f_conversion(node)
-        self.default(node)
-    self.n_fstring_expr = n_fstring_expr
-
-    def n_fstr(node):
-        if node[0] == 'expr' and node[0][0] == 'fstring_expr':
-            f_conversion(node[0][0])
-            self.default(node[0][0])
-        else:
-            value = strip_quotes(self.traverse(node[0], indent=''))
-            pass
-        self.write(value)
+    def n_formatted_value1(node):
+        expr = node[0]
+        assert expr == 'expr'
+        value = self.traverse(expr, indent='')
+        conversion = f_conversion(node)
+        f_str = "f%s" % escape_string("{%s%s}" % (value, conversion))
+        self.write(f_str)
        self.prune()
-    self.n_fstr = n_fstr

-    def n_fstring_single(node):
-        attr4 = len(node) == 3 and node[-1] == 'FORMAT_VALUE_ATTR' and node[-1].attr == 4
-        if attr4 and hasattr(node[0][0], 'attr'):
-            assert node[0] == 'expr'
+    self.n_formatted_value1 = n_formatted_value1
+
+    def n_formatted_value2(node):
+        p = self.prec
+        self.prec = 100
+
+        expr = node[0]
+        assert expr == 'expr'
+        value = self.traverse(expr, indent='')
+        format_value_attr = node[-1]
+        assert format_value_attr == 'FORMAT_VALUE_ATTR'
+        attr = format_value_attr.attr
+        if attr == 4:
            assert node[1] == 'expr'
-            self.write("{%s:%s}" % (node[0][0].attr, node[1][0].attr))
-            self.prune()
+            fmt = strip_quotes(self.traverse(node[1], indent=''))
+            conversion = ":%s" % fmt
        else:
-            f_conversion(node)
-            self.default(node)
-    self.n_fstring_single = n_fstring_single
+            conversion = FSTRING_CONVERSION_MAP.get(attr, '')
+
+        f_str = "f%s" % escape_string("{%s%s}" % (value, conversion))
+        self.write(f_str)
+
+        self.prec = p
+        self.prune()
+    self.n_formatted_value2 = n_formatted_value2

    def n_joined_str(node):
+        p = self.prec
+        self.prec = 100
+
        result = ''
-        for fstr_node in node:
-            assert fstr_node == 'fstr'
-            assert fstr_node[0] == 'expr'
-            subnode = fstr_node[0][0]
-            if subnode.kind == 'fstring_expr':
-                # Don't include outer f'...'
-                f_conversion(subnode)
-                data = strip_quotes(self.traverse(subnode, indent=''))
-                result += data
-            elif subnode == 'LOAD_CONST':
-                result += strip_quotes(escape_string(subnode.attr))
-            elif subnode == 'fstring_single':
-                f_conversion(subnode)
-                data = self.traverse(subnode, indent='')
-                if data[0:1] == 'f':
-                    data = strip_quotes(data[1:])
-                result += data
+        for expr in node[:-1]:
+            assert expr == 'expr'
+            value = self.traverse(expr, indent='')
+            if expr[0].kind.startswith('formatted_value'):
+                # remove leading 'f'
+                assert value.startswith('f')
+                value = value[1:]
                pass
            else:
-                result += strip_quotes(self.traverse(subnode, indent=''))
-                pass
+                # {{ and }} in Python source-code format strings mean
+                # { and } respectively. But only when *not* part of a
+                # formatted value. However in the LOAD_CONST
+                # bytecode, the escaping of the braces has been
+                # removed. So we need to put back the braces escaping in
+                # reconstructing the source.
+                assert expr[0] == 'LOAD_CONST'
+                value = value.replace("{", "{{").replace("}", "}}")
+
+            # Remove leading quotes
+            result += strip_quotes(value)
            pass
        self.write('f%s' % escape_string(result))
+
+        self.prec = p
        self.prune()
    self.n_joined_str = n_joined_str

--- a/uncompyle6/semantics/pysource.py
+++ b/uncompyle6/semantics/pysource.py
@@ -1837,11 +1837,7 @@ class SourceWalker(GenericASTTraversal, object):
            typ = m.group('type') or '{'
            node = startnode
            if m.group('child'):
-                try:
-                    node = node[int(m.group('child'))]
-                except:
-                    from trepan.api import debug; debug()
-                    pass
+                node = node[int(m.group('child'))]

            if   typ == '%':	self.write('%')
            elif typ == '+':