Merge branch 'python-3.0-to-3.2' into python-2.4-to-2.7

2025-08-03 00:45:53 +08:00 · 2024-07-15 10:11:20 -04:00
parent 8f3adcb1f8 90d99b202a
commit 54aad6e4d6
19 changed files with 324 additions and 76 deletions
--- a/test/bytecode_3.3/03_ifelse_in_lambda.pyc
+++ b/test/bytecode_3.3/03_ifelse_in_lambda.pyc
--- a/test/bytecode_3.3/03_map.pyc
+++ b/test/bytecode_3.3/03_map.pyc
--- a/test/bytecode_3.4/03_ifelse_in_lambda.pyc
+++ b/test/bytecode_3.4/03_ifelse_in_lambda.pyc
--- a/test/bytecode_3.4/03_map.pyc
+++ b/test/bytecode_3.4/03_map.pyc
--- a/test/bytecode_3.5/02_for_else_bug.pyc
+++ b/test/bytecode_3.5/02_for_else_bug.pyc
--- a/test/bytecode_3.5/03_ifelse_in_lambda.pyc
+++ b/test/bytecode_3.5/03_ifelse_in_lambda.pyc
--- a/test/bytecode_3.6/09_long_whilestmt.pyc
+++ b/test/bytecode_3.6/09_long_whilestmt.pyc
--- a/test/simple_source/bug34/03_ifelse_in_lambda.py
+++ b/test/simple_source/bug34/03_ifelse_in_lambda.py
@@ -0,0 +1,4 @@
 # Next line is 1164
 def foo():
    name = "bar"
    lambda x: compile(x, "<register %s's commit>" % name, "exec") if x else None
--- a/test/simple_source/bug35/02_for_else_bug.py
+++ b/test/simple_source/bug35/02_for_else_bug.py
@@ -0,0 +1,10 @@
 # Adapted 3.5 from _bootstrap_external.py
 def spec_from_file_location(loader, location):
    if loader:
        for _ in __file__:
            if location:
                break
        else:
            return None
--- a/test/simple_source/bug36/09_long_whilestmt.py
+++ b/test/simple_source/bug36/09_long_whilestmt.py
@@ -0,0 +1,74 @@
 # From https://github.com/rocky/python-uncompyle6/issues/420
 # Related to EXTENDED_ARG in whilestmt
 ERRPR_CODE_DEFINE = {}  # Remove this and things works
 try:
    print()
 except Exception:
    var1 = 0
    var2 = 1
    if var1 or var2:
        times = 1
        while times != False and self.scanner.is_open():
            try:
                try:
                    print()
                except Exception:
                    print()
                out = 0
                count = 1
                if out == 1:
                    break
                elif out == 2:
                    count += 1
                    if times == 3:
                        self.func.emit({})
                        break
                    else:
                        continue
                if out == 3 or out == b"":
                    if self.times == 3:
                        break
                    count += 1
                    if count == 3:
                        count = 0
                        if out == 4:
                            self.func.emit(ERRPR_CODE_DEFINE.ReceiedError())
                        else:
                            print()
                        break
                    continue
                else:
                    count = 0
            except Exception:
                print("upper exception")
    else:
        try:
            print("jump forward")
            while True:
                out = self.func.read(count)
                if out == b"":
                    self.func.emit(ERRPR_CODE_DEFINE.ReceiedError())
                    break
                    continue
                imagedata = out[0]
                if imagedata == b"\x05":
                    self.func.emit(INFORMATION.UnsupportedImage())
                    break
                    continue
                if imagedata == b"\x15":
                    self.func.emit(INFORMATION.NoneImage())
                    break
                    continue
                if out[1] == False:
                    start_index = imagedata.find(b"BM6")
                    self.func.emit(imagedata[start_index:], False)
                    continue
                (imagedata, all_code) = imagedata
                self.func.emit({})
                self.func.emit({})
                self.func.emit({})  # remove {} and this works
                break
        except Exception:
            pass
--- a/uncompyle6/parsers/parse34.py
+++ b/uncompyle6/parsers/parse34.py
@@ -53,6 +53,10 @@ class Python34Parser(Python33Parser):
        _ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
        genexpr_func ::= LOAD_ARG _come_froms FOR_ITER store comp_iter JUMP_BACK
        if_exp_lambda      ::= expr jmp_false expr return_if_lambda come_froms return_stmt_lambda LAMBDA_MARKER
        return_if_lambda   ::= RETURN_END_IF_LAMBDA come_froms
        return_if_stmt     ::= return_expr RETURN_END_IF POP_BLOCK
        """
    def customize_grammar_rules(self, tokens, customize):
--- a/uncompyle6/parsers/parse35.py
+++ b/uncompyle6/parsers/parse35.py
@@ -107,7 +107,6 @@ class Python35Parser(Python34Parser):
        # Python 3.5+ does jump optimization
        # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE.
        return_if_stmt    ::= return_expr RETURN_END_IF POP_BLOCK
        return_if_lambda  ::= RETURN_END_IF_LAMBDA COME_FROM
        return            ::= return_expr RETURN_END_IF
--- a/uncompyle6/parsers/parse36.py
+++ b/uncompyle6/parsers/parse36.py
@@ -52,6 +52,8 @@ class Python36Parser(Python35Parser):
        for_block       ::= l_stmts_opt come_from_loops JUMP_BACK
        come_from_loops ::= COME_FROM_LOOP*
        whilestmt       ::= SETUP_LOOP testexpr l_stmts_opt
                            JUMP_BACK come_froms POP_BLOCK
        whilestmt       ::= SETUP_LOOP testexpr l_stmts_opt
                            JUMP_BACK come_froms POP_BLOCK COME_FROM_LOOP
        whilestmt       ::= SETUP_LOOP testexpr l_stmts_opt
--- a/uncompyle6/scanners/scanner2.py
+++ b/uncompyle6/scanners/scanner2.py
@@ -491,7 +491,8 @@ class Scanner2(Scanner):
        if show_asm in ("both", "after"):
            print("\n# ---- tokenization:")
-            for t in new_tokens:
+            # FIXME: t.format() is changing tokens!
            for t in new_tokens.copy():
                print(t.format(line_prefix=""))
            print()
        return new_tokens, customize
--- a/uncompyle6/scanners/scanner26.py
+++ b/uncompyle6/scanners/scanner26.py
@@ -349,7 +349,8 @@ class Scanner26(Scanner2):
        if show_asm in ("both", "after"):
            print("\n# ---- tokenization:")
-            for t in tokens:
+            # FIXME: t.format() is changing tokens!
            for t in tokens.copy():
                print(t.format(line_prefix=""))
            print()
        return tokens, customize
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -203,7 +203,7 @@ class Scanner3(Scanner):
        self, insts, next_tokens, inst, t, i, collection_type
    ):
        """
-        Try to a replace sequence of instruction that ends with a
+        Try to replace a sequence of instruction that ends with a
        BUILD_xxx with a sequence that can be parsed much faster, but
        inserting the token boundary at the beginning of the sequence.
        """
@@ -285,7 +285,7 @@ class Scanner3(Scanner):
        )
        return new_tokens
-    def bound_map_from_inst(self, insts, next_tokens, inst, t, i):
+    def bound_map_from_inst(self, insts, next_tokens, t, i):
        """
        Try to a sequence of instruction that ends with a BUILD_MAP into
        a sequence that can be parsed much faster, but inserting the
@@ -300,25 +300,19 @@ class Scanner3(Scanner):
        if count < 5:
            return None
-        if self.version >= (3, 5):
+        # Newer Python BUILD_MAP argument's count is a
-            # Newer Python BUILD_MAP argument's count is a
+        # key and value pair so it is multiplied by two.
-            # key and value pair so it is multiplied by two.
+        collection_start = i - (count * 2)
-            collection_start = i - (count * 2)
+        assert (count * 2) <= i
            assert (count * 2) <= i
-            for j in range(collection_start, i, 2):
+        for j in range(collection_start, i, 2):
-                if insts[j].opname not in ("LOAD_CONST",):
+            if insts[j].opname not in ("LOAD_CONST",):
-                    return None
+                return None
-                if insts[j + 1].opname not in ("LOAD_CONST",):
+            if insts[j + 1].opname not in ("LOAD_CONST",):
-                    return None
+                return None
-            collection_start = i - (2 * count)
+        collection_start = i - (2 * count)
-            collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
+        collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
        # else: Older Python count is sum of all key and value pairs
        # Each pair is added individually like:
        #    LOAD_CONST           ("Max-Age")
        #    LOAD_CONST           ("max-age")
        #    STORE_MAP
        # If we get here, all instructions before tokens[i] are LOAD_CONST and
        # we can replace add a boundary marker and change LOAD_CONST to
@@ -331,7 +325,7 @@ class Scanner3(Scanner):
                attr=collection_enum,
                pattr="CONST_MAP",
                offset="%s_0" % start_offset,
-                linestart=False,
+                linestart=insts[collection_start].starts_line,
                has_arg=True,
                has_extended_arg=False,
                opc=self.opc,
@@ -349,6 +343,7 @@ class Scanner3(Scanner):
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                    optype="pseudo",
                )
            )
            new_tokens.append(
@@ -361,7 +356,7 @@ class Scanner3(Scanner):
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
-                    optype=insts[j + 1].optype,
+                    optype="pseudo",
                )
            )
        new_tokens.append(
@@ -374,7 +369,93 @@ class Scanner3(Scanner):
                has_arg=t.has_arg,
                has_extended_arg=False,
                opc=t.opc,
-                optype=t.optype,
+                optype="pseudo",
            )
        )
        return new_tokens
    def bound_map_from_inst_pre35(
        self, insts: list, next_tokens: list, t: Token, i: int
    ):
        """
        Try to a sequence of instruction that ends with a BUILD_MAP into
        a sequence that can be parsed much faster, but inserting the
        token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
        # For small lists don't bother
        if count < 10:
            return None
        # Older Python BUILD_MAP argument's count is a
        # key and value pair and STORE_MAP. So it is multiplied by three.
        collection_end = i + 1 + count * 3
        for j in range(i + 1, collection_end, 3):
            if insts[j].opname not in ("LOAD_CONST",):
                return None
            if insts[j + 1].opname not in ("LOAD_CONST",):
                return None
            if insts[j + 2].opname not in ("STORE_MAP",):
                return None
        collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
        new_tokens = next_tokens[:i]
        start_offset = insts[i].offset
        new_tokens.append(
            Token(
                opname="COLLECTION_START",
                attr=collection_enum,
                pattr="CONST_MAP",
                offset="%s_0" % start_offset,
                linestart=insts[i].starts_line,
                has_arg=True,
                has_extended_arg=False,
                opc=self.opc,
                optype="pseudo",
            )
        )
        for j in range(i + 1, collection_end, 3):
            new_tokens.append(
                Token(
                    opname="ADD_KEY",
                    attr=insts[j + 1].argval,
                    pattr=insts[j + 1].argrepr,
                    offset=insts[j + 1].offset,
                    linestart=insts[j + 1].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                    optype="pseudo",
                )
            )
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
                    attr=insts[j].argval,
                    pattr=insts[j].argrepr,
                    offset=insts[j].offset,
                    linestart=insts[j].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                    optype="pseudo",
                )
            )
        new_tokens.append(
            Token(
                opname="BUILD_DICT_OLDER",
                attr=t.attr,
                pattr=t.pattr,
                offset=t.offset,
                linestart=t.linestart,
                has_arg=t.has_arg,
                has_extended_arg=False,
                opc=t.opc,
                optype="pseudo",
            )
        )
        return new_tokens
@@ -483,8 +564,17 @@ class Scanner3(Scanner):
        last_op_was_break = False
        new_tokens = []
        skip_end_offset = None
        for i, inst in enumerate(self.insts):
            # BUILD_MAP for < 3.5 can skip *forward* in instructions and
            # replace them. So we use the below to get up to the position
            # scanned and replaced forward
            if skip_end_offset and inst.offset <= skip_end_offset:
                continue
            skip_end_offset = None
            opname = inst.opname
            argval = inst.argval
            pattr = inst.argrepr
@@ -517,17 +607,38 @@ class Scanner3(Scanner):
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
-            elif opname in ("BUILD_MAP",) and self.version >= (3, 5):
+
-                try_tokens = self.bound_map_from_inst(
+            elif opname in ("BUILD_MAP",):
                bound_map_from_insts_fn = (
                    self.bound_map_from_inst_35
                    if self.version >= (3, 5)
                    else self.bound_map_from_inst_pre35
                )
                try_tokens = bound_map_from_insts_fn(
                    self.insts,
                    new_tokens,
                    inst,
                    t,
                    i,
                )
                if try_tokens is not None:
-                    new_tokens = try_tokens
+                    if self.version < (3, 5):
-                    continue
+                        assert try_tokens[-1] == "BUILD_DICT_OLDER"
                        prev_offset = inst.offset
                        for j in range(i, len(self.insts)):
                            if self.insts[j].opname == "STORE_NAME":
                                new_tokens = try_tokens
                                skip_end_offset = prev_offset
                                # Set a hacky sentinal to indicate skipping to the
                                # next instruction
                                opname = "EXTENDED_ARG"
                                break
                            prev_offset = self.insts[j].offset
                            pass
                        pass
                    else:
                        new_tokens = try_tokens
                        continue
                pass
            argval = inst.argval
            op = inst.opcode
@@ -786,7 +897,8 @@ class Scanner3(Scanner):
        if show_asm in ("both", "after"):
            print("\n# ---- tokenization:")
-            for t in new_tokens:
+            # FIXME: t.format() is changing tokens!
            for t in new_tokens.copy():
                print(t.format(line_prefix=""))
            print()
        return new_tokens, customize
--- a/uncompyle6/scanners/scanner37base.py
+++ b/uncompyle6/scanners/scanner37base.py
@@ -225,13 +225,13 @@ class Scanner37Base(Scanner):
        if show_asm in ("both", "before"):
            print("\n# ---- disassembly:")
-            self.insts = bytecode.disassemble_bytes(
+            bytecode.disassemble_bytes(
                co.co_code,
                varnames=co.co_varnames,
                names=co.co_names,
                constants=co.co_consts,
                cells=bytecode._cell_names,
-                linestarts=bytecode._linestarts,
+                line_starts=bytecode._linestarts,
                asm_format="extended",
                filename=co.co_filename,
                show_source=True,
@@ -478,12 +478,17 @@ class Scanner37Base(Scanner):
                    next_opname = self.insts[i + 1].opname
                    # 'Continue's include jumps to loops that are not
-                    # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
+                    # and the end of a block which follow with
-                    # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
+                    # POP_BLOCK and COME_FROM_LOOP.  If the
-                    # then we'll take it as a "continue".
+                    # JUMP_ABSOLUTE is to a FOR_ITER, and it is
-                    is_continue = (
+                    # followed by another JUMP_FORWARD then we'll take
-                        self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
+                    # it as a "continue".
-                        and self.insts[i + 1].opname == "JUMP_FORWARD"
+                    next_inst = self.insts[i + 1]
                    is_continue = self.insts[
                        self.offset2inst_index[target]
                    ].opname == "FOR_ITER" and next_inst.opname in (
                        "JUMP_FORWARD",
                        "JUMP_ABSOLUTE",
                    )
                    if self.version < (3, 8) and (
@@ -498,21 +503,65 @@ class Scanner37Base(Scanner):
                    ):
                        opname = "CONTINUE"
                    else:
                        # "continue" versus "break_loop" dectction is more complicated
                        # because "continue" to an outer loop is really a "break loop"
                        opname = "JUMP_BACK"
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
-                        # There are other situations where we don't catch
+                        #
-                        # CONTINUE as well.
+                        # Another situation is where we have
-                        if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
+                        #   for method in methods:
                        #      for B in method:
                        #         if c:
                        #           return
                        #        break  # A "continue" but not the innermost one
                        if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval:
                            if tokens[-2].kind == "BREAK_LOOP":
                                del tokens[-1]
                                j -= 1
                            else:
-                                # intern is used because we are changing the *previous* token
+                                # "intern" is used because we are
-                                tokens[-1].kind = sys.intern("CONTINUE")
+                                # changing the *previous* token.  A
-                    if last_op_was_break and opname == "CONTINUE":
+                                # POP_TOP suggests a "break" rather
-                        last_op_was_break = False
+                                # than a "continue"?
-                        continue
+                                if tokens[-2] == "POP_TOP" and (
                                    is_continue and next_inst.argval != tokens[-1].attr
                                ):
                                    tokens[-1].kind = sys.intern("BREAK_LOOP")
                                else:
                                    tokens[-1].kind = sys.intern("CONTINUE")
                                    last_continue = tokens[-1]
                                    pass
                                pass
                            pass
                    #     elif (
                    #         last_continue is not None
                    #         and tokens[-1].kind == "JUMP_LOOP"
                    #         and last_continue.attr <= tokens[-1].attr
                    #         and last_continue.offset > tokens[-1].attr
                    #     ):
                    #         # Handle mis-characterized "CONTINUE"
                    #         # We have a situation like:
                    #         # loop ... for or while)
                    #         #   loop
                    #         #     if ...:   # code below starts here
                    #         #       break  # not continue
                    #         #
                    #         #   POP_JUMP_IF_FALSE_LOOP   # to outer loop
                    #         #   JUMP_LOOP                # to inner loop
                    #         #   ...
                    #         #   JUMP_LOOP                # to outer loop
                    #         tokens[-2].kind = sys.intern("BREAK_LOOP")
                    #         pass
                    # if last_op_was_break and opname == "CONTINUE":
                    #     last_op_was_break = False
                    #     continue
                    pass
                else:
                    opname = "JUMP_FORWARD"
            elif inst.offset in self.load_asserts:
                opname = "LOAD_ASSERT"
@@ -535,9 +584,10 @@ class Scanner37Base(Scanner):
            )
            pass
-        if show_asm in ("both", "after"):
+        if show_asm in ("both", "after") and self.version < (3, 8):
            print("\n# ---- tokenization:")
-            for t in tokens:
+            # FIXME: t.format() is changing tokens!
            for t in tokens.copy():
                print(t.format(line_prefix=""))
            print()
        return tokens, customize
--- a/uncompyle6/scanners/scanner38.py
+++ b/uncompyle6/scanners/scanner38.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2019-2022 by Rocky Bernstein
+#  Copyright (c) 2019-2022, 2024 by Rocky Bernstein
 #
 #  This program is free software: you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -117,35 +117,26 @@ class Scanner38(Scanner37):
                    new_tokens.append(token)
                    continue
-                # We also want to avoid confusing BREAK_LOOPS with parts of the
+                j = i
-                # grammar rules for loops. (Perhaps we should change the grammar.)
+                while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"):
-                # Try to find an adjacent JUMP_BACK which is part of the normal loop end.
+                    j -= 1
                    if tokens[j].linestart:
                        break
                token_with_linestart = tokens[j]
-                if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK":
+                if token_with_linestart.linestart:
                    # Sometimes the jump back is after the "break" instruction..
                    jump_back_index = i + 1
                else:
                    # and sometimes, because of jump-to-jump optimization, it is before the
                    # jump target instruction.
                    jump_back_index = self.offset2tok_index[jump_target] - 1
                    while tokens[jump_back_index].kind.startswith("COME_FROM_"):
                        jump_back_index -= 1
                        pass
                    pass
                jump_back_token = tokens[jump_back_index]
                # Is this a forward jump not next to a JUMP_BACK ? ...
                break_loop = token.linestart and jump_back_token != "JUMP_BACK"
                # or if there is looping jump back, then that loop
                # should start before where the "break" instruction sits.
                if break_loop or (
                    jump_back_token == "JUMP_BACK"
                    and jump_back_token.attr < token.off2int()
                ):
                    token.kind = "BREAK_LOOP"
                pass
            new_tokens.append(token)
        if show_asm in ("both", "after"):
            print("\n# ---- tokenization:")
            # FIXME: t.format() is changing tokens!
            for t in new_tokens.copy():
                print(t.format(line_prefix=""))
            print()
        return new_tokens, customize
--- a/uncompyle6/scanners/tok.py
+++ b/uncompyle6/scanners/tok.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2016-2021, 2023 by Rocky Bernstein
+#  Copyright (c) 2016-2021, 2023-2024 by Rocky Bernstein
 #  Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
 #  Copyright (c) 1999 John Aycock
 #