Merge branch 'python-3.3-to-3.5' into python-2.4

2025-08-04 09:22:40 +08:00 · 2022-04-26 02:46:29 -04:00
parent e5d82f7613 3471d11dd5
commit 9fe3f94240
6 changed files with 243 additions and 51 deletions
--- a/test/bytecode_3.6_run/05_long_literals.pyc
+++ b/test/bytecode_3.6_run/05_long_literals.pyc
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -816,6 +816,22 @@ class Python3Parser(PythonParser):
                rule = "starred ::= %s %s" % ("expr " * v, opname)
                self.addRule(rule, nop_func)
            elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
                if opname == "BUILD_CONST_DICT":
                    rule = """
                           add_consts          ::= ADD_VALUE*
                           const_list          ::= COLLECTION_START add_consts %s
                           dict                ::= const_list
                           expr                ::= dict
                           """ % opname
                else:
                    rule = """
                           add_consts          ::= ADD_VALUE*
                           const_list          ::= COLLECTION_START add_consts %s
                           expr                ::= const_list
                           """ % opname
                self.addRule(rule, nop_func)
            elif opname_base in (
                "BUILD_LIST",
                "BUILD_SET",
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -40,16 +40,17 @@ if PYTHON_VERSION_TRIPLE < (2, 6):
 else:
    from collections import namedtuple
-from xdis import iscode, instruction_size
+from xdis import iscode, instruction_size, Instruction
 from xdis.bytecode import _get_const_info
-from uncompyle6.scanner import Token, parse_fn_counts
+from uncompyle6.scanners.tok import Token
 from uncompyle6.scanner import parse_fn_counts
 import xdis
 # Get all the opcodes into globals
 import xdis.opcodes.opcode_33 as op3
-from uncompyle6.scanner import Scanner
+from uncompyle6.scanner import Scanner, CONST_COLLECTIONS
 import sys
@@ -207,17 +208,96 @@ class Scanner3(Scanner):
        # self.varargs_ops = frozenset(self.opc.hasvargs)
        return
-    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
+    def bound_collection_from_inst(
        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
    ):
        count = t.attr
        assert isinstance(count, int)
        assert count <= i
        if collection_type == "CONST_DICT":
            # constant dictonaries work via BUILD_CONST_KEY_MAP and
            # handle the values() like sets and lists.
            # However the keys() are an LOAD_CONST of the keys.
            # adjust offset to account for this
            count += 1
        # For small lists don't bother
        if count < 5:
            return None
        collection_start = i - count
        for j in range(collection_start, i):
            if insts[j].opname not in (
                "LOAD_CONST",
                "LOAD_FAST",
                "LOAD_GLOBAL",
                "LOAD_NAME",
            ):
                return None
        collection_enum = CONST_COLLECTIONS.index(collection_type)
        # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
        # add a boundary marker and change LOAD_CONST to something else
        new_tokens = next_tokens[:-count]
        start_offset = insts[collection_start].offset
        new_tokens.append(
            Token(
                opname="COLLECTION_START",
                attr=collection_enum,
                pattr=collection_type,
                offset= "%s_0" % start_offset,
                linestart=False,
                has_arg=True,
                has_extended_arg=False,
                opc=self.opc,
            )
        )
        for j in range(collection_start, i):
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
                    attr=insts[j].argval,
                    pattr=insts[j].argrepr,
                    offset=insts[j].offset,
                    linestart=insts[j].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                )
            )
        new_tokens.append(
            Token(
                opname="BUILD_%s" % collection_type,
                attr=t.attr,
                pattr=t.pattr,
                offset=t.offset,
                linestart=t.linestart,
                has_arg=t.has_arg,
                has_extended_arg=False,
                opc=t.opc,
            )
        )
        return new_tokens
    def ingest(self, co, classname=None, code_objects={}, show_asm=None
        ):
        """
-        Pick out tokens from an uncompyle6 code object, and transform them,
+        Create "tokens" the bytecode of an Python code object. Largely these
        are the opcode name, but in some cases that has been modified to make parsing
        easier.
        returning a list of uncompyle6 Token's.
-        The transformations are made to assist the deparsing grammar.
+        Some transformations are made to assist the deparsing grammar:
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
-           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
+           -  operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
-           -  some EXTENDED_ARGS instructions are removed
+              *  BUILD_LIST, BUILD_SET
              *  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
           -  EXTENDED_ARGS instructions are removed
        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -237,9 +317,6 @@ class Scanner3(Scanner):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())
        # list of tokens/instructions
        tokens = []
        # "customize" is in the process of going away here
        customize = {}
@@ -254,6 +331,7 @@ class Scanner3(Scanner):
        n = len(self.insts)
        for i, inst in enumerate(self.insts):
            opname = inst.opname
            # We need to detect the difference between:
            #   raise AssertionError
            #  and
@@ -264,7 +342,7 @@ class Scanner3(Scanner):
            if self.version[:2] == (3, 0):
                # Like 2.6, 3.0 doesn't have POP_JUMP_IF... so we have
                # to go through more machinations
-                assert_can_follow = inst.opname == "POP_TOP" and i + 1 < n
+                assert_can_follow = opname == "POP_TOP" and i + 1 < n
                if assert_can_follow:
                    prev_inst = self.insts[i - 1]
                    assert_can_follow = (
@@ -273,7 +351,7 @@ class Scanner3(Scanner):
                    jump_if_inst = prev_inst
            else:
                assert_can_follow = (
-                    inst.opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
+                    opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
                    and i + 1 < n
                )
                jump_if_inst = inst
@@ -297,13 +375,48 @@ class Scanner3(Scanner):
        # print("XXX2", jump_targets)
        last_op_was_break = False
        new_tokens = []
        for i, inst in enumerate(self.insts):
            opname = inst.opname
            argval = inst.argval
            pattr = inst.argrepr
            t = Token(
                    opname=opname,
                    attr=argval,
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                    op=inst.opcode,
                    has_arg=inst.has_arg,
                    has_extended_arg=inst.has_extended_arg,
                    opc=self.opc,
                )
            # things that smash new_tokens like BUILD_LIST have to come first.
            if opname in (
                "BUILD_CONST_KEY_MAP",
                "BUILD_LIST",
                "BUILD_SET",
            ):
                collection_type = (
                    "DICT"
                    if opname.startswith("BUILD_CONST_KEY_MAP")
                    else opname.split("_")[1]
                )
                try_tokens = self.bound_collection_from_inst(
                    self.insts, new_tokens, inst, t, i, "CONST_%s" % collection_type
                )
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
            argval = inst.argval
            op = inst.opcode
-            if inst.opname == "EXTENDED_ARG":
+            if opname == "EXTENDED_ARG":
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION:
@@ -319,18 +432,18 @@ class Scanner3(Scanner):
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = "COME_FROM"
-                    opname = self.opname_for_offset(jump_offset)
+                    come_from_opname = self.opname_for_offset(jump_offset)
-                    if opname == "EXTENDED_ARG":
+                    if come_from_opname == "EXTENDED_ARG":
                        j = xdis.next_offset(op, self.opc, jump_offset)
-                        opname = self.opname_for_offset(j)
+                        come_from_opname = self.opname_for_offset(j)
-                    if opname.startswith("SETUP_"):
+                    if come_from_opname.startswith("SETUP_"):
-                        come_from_type = opname[len("SETUP_") :]
+                        come_from_type = come_from_opname[len("SETUP_") :]
                        come_from_name = "COME_FROM_%s" % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = "COME_FROM_EXCEPT_CLAUSE"
-                    tokens.append(
+                    new_tokens.append(
                        Token(
                            come_from_name,
                            jump_offset,
@@ -345,7 +458,7 @@ class Scanner3(Scanner):
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
-                tokens.append(
+                new_tokens.append(
                    Token(
                        "ELSE",
                        None,
@@ -358,9 +471,6 @@ class Scanner3(Scanner):
                pass
            pattr = inst.argrepr
            opname = inst.opname
            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
@@ -428,7 +538,7 @@ class Scanner3(Scanner):
                        pass
                    opname = "%s_%d" % (opname, pos_args)
                    attr = (pos_args, name_pair_args, annotate_args)
-                tokens.append(
+                new_tokens.append(
                    Token(
                        opname=opname,
                        attr=attr,
@@ -514,12 +624,12 @@ class Scanner3(Scanner):
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
-                        if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
+                        if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
-                            if tokens[-2].kind == "BREAK_LOOP":
+                            if new_tokens[-2].kind == "BREAK_LOOP":
-                                del tokens[-1]
+                                del new_tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token
-                                tokens[-1].kind = intern("CONTINUE")
+                                new_tokens[-1].kind = intern("CONTINUE")
                    if last_op_was_break and opname == "CONTINUE":
                        last_op_was_break = False
                        continue
@@ -533,25 +643,17 @@ class Scanner3(Scanner):
                opname = "LOAD_ASSERT"
            last_op_was_break = opname == "BREAK_LOOP"
-            tokens.append(
+            t.kind = opname
-                Token(
+            t.attr = argval
-                    opname=opname,
+            t.pattr = pattr
-                    attr=argval,
+            new_tokens.append(t)
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                    op=op,
                    has_arg=inst.has_arg,
                    opc=self.opc,
                )
            )
            pass
        if show_asm in ("both", "after"):
-            for t in tokens:
+            for t in new_tokens:
                print(t.format(line_prefix=""))
            print()
-        return tokens, customize
+        return new_tokens, customize
    def find_jump_targets(self, debug):
        """
--- a/uncompyle6/scanners/scanner37.py
+++ b/uncompyle6/scanners/scanner37.py
@@ -22,6 +22,8 @@ This sets up opcodes Python's 3.7 and calls a generalized
 scanner routine for Python 3.
 """
 from uncompyle6.scanner import CONST_COLLECTIONS
 from uncompyle6.scanners.tok import Token
 from uncompyle6.scanners.scanner37base import Scanner37Base
 # bytecode verification, verify(), uses JUMP_OPs from here
@@ -30,9 +32,6 @@ from xdis.opcodes import opcode_37 as opc
 # bytecode verification, verify(), uses JUMP_OPS from here
 JUMP_OPs = opc.JUMP_OPS
 CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
 class Scanner37(Scanner37Base):
    def __init__(self, show_asm=None, is_pypy=False):
        Scanner37Base.__init__(self, (3, 7), show_asm)
@@ -41,6 +40,81 @@ class Scanner37(Scanner37Base):
    pass
    def bound_collection_from_tokens(
        self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
    ) -> list:
        count = t.attr
        assert isinstance(count, int)
        assert count <= i
        if collection_type == "CONST_DICT":
            # constant dictonaries work via BUILD_CONST_KEY_MAP and
            # handle the values() like sets and lists.
            # However the keys() are an LOAD_CONST of the keys.
            # adjust offset to account for this
            count += 1
        # For small lists don't bother
        if count < 5:
            return next_tokens + [t]
        collection_start = i - count
        for j in range(collection_start, i):
            if tokens[j].kind not in (
                "LOAD_CONST",
                "LOAD_FAST",
                "LOAD_GLOBAL",
                "LOAD_NAME",
            ):
                return next_tokens + [t]
        collection_enum = CONST_COLLECTIONS.index(collection_type)
        # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
        # add a boundary marker and change LOAD_CONST to something else
        new_tokens = next_tokens[:-count]
        start_offset = tokens[collection_start].offset
        new_tokens.append(
            Token(
                opname="COLLECTION_START",
                attr=collection_enum,
                pattr=collection_type,
                offset="%s_0" % start_offset,
                linestart=False,
                has_arg=True,
                has_extended_arg=False,
                opc=self.opc,
            )
        )
        for j in range(collection_start, i):
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
                    attr=tokens[j].attr,
                    pattr=tokens[j].pattr,
                    offset=tokens[j].offset,
                    linestart=tokens[j].linestart,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                )
            )
        new_tokens.append(
            Token(
                opname="BUILD_%s" % collection_type,
                attr=t.attr,
                pattr=t.pattr,
                offset=t.offset,
                linestart=t.linestart,
                has_arg=t.has_arg,
                has_extended_arg=False,
                opc=t.opc,
            )
        )
        return new_tokens
    def ingest(
        self, co, classname=None, code_objects={}, show_asm=None
    ):
--- a/uncompyle6/semantics/n_actions.py
+++ b/uncompyle6/semantics/n_actions.py
@@ -227,7 +227,7 @@ class NonterminalActions:
        self.indent_more(INDENT_PER_LEVEL)
        sep = ""
        if is_dict:
-            keys = flat_elems[-1].pattr
+            keys = flat_elems[-1].attr
            assert isinstance(keys, tuple)
            assert len(keys) == len(flat_elems) - 1
            for i, elem in enumerate(flat_elems[:-1]):
@@ -724,8 +724,8 @@ class NonterminalActions:
    def n_import_from(self, node):
        relative_path_index = 0
        if self.version >= (2, 5):
-            if node[relative_path_index].pattr > 0:
+            if node[relative_path_index].attr > 0:
-                node[2].pattr = ("." * node[relative_path_index].pattr) + node[2].pattr
+                node[2].pattr = ("." * node[relative_path_index].attr) + node[2].pattr
            if self.version > (2, 7):
                if isinstance(node[1].pattr, tuple):
                    imports = node[1].pattr