Merge branch 'python-3.3-to-3.5' into python-2.4

2025-08-04 01:09:52 +08:00 · 2022-04-26 02:46:29 -04:00
parent e5d82f7613 3471d11dd5
commit 9fe3f94240
6 changed files with 243 additions and 51 deletions
--- a/test/bytecode_3.6_run/05_long_literals.pyc
+++ b/test/bytecode_3.6_run/05_long_literals.pyc
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -816,6 +816,22 @@ class Python3Parser(PythonParser):
                rule = "starred ::= %s %s" % ("expr " * v, opname)
                self.addRule(rule, nop_func)

+            elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
+                if opname == "BUILD_CONST_DICT":
+                    rule = """
+                           add_consts          ::= ADD_VALUE*
+                           const_list          ::= COLLECTION_START add_consts %s
+                           dict                ::= const_list
+                           expr                ::= dict
+                           """ % opname
+                else:
+                    rule = """
+                           add_consts          ::= ADD_VALUE*
+                           const_list          ::= COLLECTION_START add_consts %s
+                           expr                ::= const_list
+                           """ % opname
+                self.addRule(rule, nop_func)
+
            elif opname_base in (
                "BUILD_LIST",
                "BUILD_SET",
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -40,16 +40,17 @@ if PYTHON_VERSION_TRIPLE < (2, 6):
 else:
    from collections import namedtuple

-from xdis import iscode, instruction_size
+from xdis import iscode, instruction_size, Instruction
 from xdis.bytecode import _get_const_info

-from uncompyle6.scanner import Token, parse_fn_counts
+from uncompyle6.scanners.tok import Token
+from uncompyle6.scanner import parse_fn_counts
 import xdis

 # Get all the opcodes into globals
 import xdis.opcodes.opcode_33 as op3

-from uncompyle6.scanner import Scanner
+from uncompyle6.scanner import Scanner, CONST_COLLECTIONS

 import sys

@@ -207,17 +208,96 @@ class Scanner3(Scanner):
        # self.varargs_ops = frozenset(self.opc.hasvargs)
        return

-    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
+    def bound_collection_from_inst(
+        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
+    ):
+        count = t.attr
+        assert isinstance(count, int)
+
+        assert count <= i
+
+        if collection_type == "CONST_DICT":
+            # constant dictonaries work via BUILD_CONST_KEY_MAP and
+            # handle the values() like sets and lists.
+            # However the keys() are an LOAD_CONST of the keys.
+            # adjust offset to account for this
+            count += 1
+
+        # For small lists don't bother
+        if count < 5:
+            return None
+
+        collection_start = i - count
+
+        for j in range(collection_start, i):
+            if insts[j].opname not in (
+                "LOAD_CONST",
+                "LOAD_FAST",
+                "LOAD_GLOBAL",
+                "LOAD_NAME",
+            ):
+                return None
+
+        collection_enum = CONST_COLLECTIONS.index(collection_type)
+
+        # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
+        # add a boundary marker and change LOAD_CONST to something else
+        new_tokens = next_tokens[:-count]
+        start_offset = insts[collection_start].offset
+        new_tokens.append(
+            Token(
+                opname="COLLECTION_START",
+                attr=collection_enum,
+                pattr=collection_type,
+                offset= "%s_0" % start_offset,
+                linestart=False,
+                has_arg=True,
+                has_extended_arg=False,
+                opc=self.opc,
+            )
+        )
+        for j in range(collection_start, i):
+            new_tokens.append(
+                Token(
+                    opname="ADD_VALUE",
+                    attr=insts[j].argval,
+                    pattr=insts[j].argrepr,
+                    offset=insts[j].offset,
+                    linestart=insts[j].starts_line,
+                    has_arg=True,
+                    has_extended_arg=False,
+                    opc=self.opc,
+                )
+            )
+        new_tokens.append(
+            Token(
+                opname="BUILD_%s" % collection_type,
+                attr=t.attr,
+                pattr=t.pattr,
+                offset=t.offset,
+                linestart=t.linestart,
+                has_arg=t.has_arg,
+                has_extended_arg=False,
+                opc=t.opc,
+            )
+        )
+        return new_tokens
+
+    def ingest(self, co, classname=None, code_objects={}, show_asm=None
+        ):
        """
-        Pick out tokens from an uncompyle6 code object, and transform them,
+        Create "tokens" the bytecode of an Python code object. Largely these
+        are the opcode name, but in some cases that has been modified to make parsing
+        easier.
        returning a list of uncompyle6 Token's.

-        The transformations are made to assist the deparsing grammar.
-        Specificially:
+        Some transformations are made to assist the deparsing grammar:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
-           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
-           -  some EXTENDED_ARGS instructions are removed
+           -  operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
+              *  BUILD_LIST, BUILD_SET
+              *  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
+           -  EXTENDED_ARGS instructions are removed

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
@@ -237,9 +317,6 @@ class Scanner3(Scanner):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

-        # list of tokens/instructions
-        tokens = []
-
        # "customize" is in the process of going away here
        customize = {}

@@ -254,6 +331,7 @@ class Scanner3(Scanner):
        n = len(self.insts)
        for i, inst in enumerate(self.insts):

+            opname = inst.opname
            # We need to detect the difference between:
            #   raise AssertionError
            #  and
@@ -264,7 +342,7 @@ class Scanner3(Scanner):
            if self.version[:2] == (3, 0):
                # Like 2.6, 3.0 doesn't have POP_JUMP_IF... so we have
                # to go through more machinations
-                assert_can_follow = inst.opname == "POP_TOP" and i + 1 < n
+                assert_can_follow = opname == "POP_TOP" and i + 1 < n
                if assert_can_follow:
                    prev_inst = self.insts[i - 1]
                    assert_can_follow = (
@@ -273,7 +351,7 @@ class Scanner3(Scanner):
                    jump_if_inst = prev_inst
            else:
                assert_can_follow = (
-                    inst.opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
+                    opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
                    and i + 1 < n
                )
                jump_if_inst = inst
@@ -297,13 +375,48 @@ class Scanner3(Scanner):
        # print("XXX2", jump_targets)

        last_op_was_break = False
+        new_tokens = []

        for i, inst in enumerate(self.insts):

+            opname = inst.opname
+            argval = inst.argval
+            pattr = inst.argrepr
+
+            t = Token(
+                    opname=opname,
+                    attr=argval,
+                    pattr=pattr,
+                    offset=inst.offset,
+                    linestart=inst.starts_line,
+                    op=inst.opcode,
+                    has_arg=inst.has_arg,
+                    has_extended_arg=inst.has_extended_arg,
+                    opc=self.opc,
+                )
+
+            # things that smash new_tokens like BUILD_LIST have to come first.
+            if opname in (
+                "BUILD_CONST_KEY_MAP",
+                "BUILD_LIST",
+                "BUILD_SET",
+            ):
+                collection_type = (
+                    "DICT"
+                    if opname.startswith("BUILD_CONST_KEY_MAP")
+                    else opname.split("_")[1]
+                )
+                try_tokens = self.bound_collection_from_inst(
+                    self.insts, new_tokens, inst, t, i, "CONST_%s" % collection_type
+                )
+                if try_tokens is not None:
+                    new_tokens = try_tokens
+                    continue
+
            argval = inst.argval
            op = inst.opcode

-            if inst.opname == "EXTENDED_ARG":
+            if opname == "EXTENDED_ARG":
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION:
@@ -319,18 +432,18 @@ class Scanner3(Scanner):
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = "COME_FROM"
-                    opname = self.opname_for_offset(jump_offset)
-                    if opname == "EXTENDED_ARG":
+                    come_from_opname = self.opname_for_offset(jump_offset)
+                    if come_from_opname == "EXTENDED_ARG":
                        j = xdis.next_offset(op, self.opc, jump_offset)
-                        opname = self.opname_for_offset(j)
+                        come_from_opname = self.opname_for_offset(j)

-                    if opname.startswith("SETUP_"):
-                        come_from_type = opname[len("SETUP_") :]
+                    if come_from_opname.startswith("SETUP_"):
+                        come_from_type = come_from_opname[len("SETUP_") :]
                        come_from_name = "COME_FROM_%s" % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = "COME_FROM_EXCEPT_CLAUSE"
-                    tokens.append(
+                    new_tokens.append(
                        Token(
                            come_from_name,
                            jump_offset,
@@ -345,7 +458,7 @@ class Scanner3(Scanner):
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
-                tokens.append(
+                new_tokens.append(
                    Token(
                        "ELSE",
                        None,
@@ -358,9 +471,6 @@ class Scanner3(Scanner):

                pass

-            pattr = inst.argrepr
-            opname = inst.opname
-
            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
@@ -428,7 +538,7 @@ class Scanner3(Scanner):
                        pass
                    opname = "%s_%d" % (opname, pos_args)
                    attr = (pos_args, name_pair_args, annotate_args)
-                tokens.append(
+                new_tokens.append(
                    Token(
                        opname=opname,
                        attr=attr,
@@ -514,12 +624,12 @@ class Scanner3(Scanner):
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
-                        if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
-                            if tokens[-2].kind == "BREAK_LOOP":
-                                del tokens[-1]
+                        if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
+                            if new_tokens[-2].kind == "BREAK_LOOP":
+                                del new_tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token
-                                tokens[-1].kind = intern("CONTINUE")
+                                new_tokens[-1].kind = intern("CONTINUE")
                    if last_op_was_break and opname == "CONTINUE":
                        last_op_was_break = False
                        continue
@@ -533,25 +643,17 @@ class Scanner3(Scanner):
                opname = "LOAD_ASSERT"

            last_op_was_break = opname == "BREAK_LOOP"
-            tokens.append(
-                Token(
-                    opname=opname,
-                    attr=argval,
-                    pattr=pattr,
-                    offset=inst.offset,
-                    linestart=inst.starts_line,
-                    op=op,
-                    has_arg=inst.has_arg,
-                    opc=self.opc,
-                )
-            )
+            t.kind = opname
+            t.attr = argval
+            t.pattr = pattr
+            new_tokens.append(t)
            pass

        if show_asm in ("both", "after"):
-            for t in tokens:
+            for t in new_tokens:
                print(t.format(line_prefix=""))
            print()
-        return tokens, customize
+        return new_tokens, customize

    def find_jump_targets(self, debug):
        """
--- a/uncompyle6/scanners/scanner37.py
+++ b/uncompyle6/scanners/scanner37.py
@@ -22,6 +22,8 @@ This sets up opcodes Python's 3.7 and calls a generalized
 scanner routine for Python 3.
 """

+from uncompyle6.scanner import CONST_COLLECTIONS
+from uncompyle6.scanners.tok import Token
 from uncompyle6.scanners.scanner37base import Scanner37Base

 # bytecode verification, verify(), uses JUMP_OPs from here
@@ -30,9 +32,6 @@ from xdis.opcodes import opcode_37 as opc
 # bytecode verification, verify(), uses JUMP_OPS from here
 JUMP_OPs = opc.JUMP_OPS

-CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
-
-
 class Scanner37(Scanner37Base):
    def __init__(self, show_asm=None, is_pypy=False):
        Scanner37Base.__init__(self, (3, 7), show_asm)
@@ -41,6 +40,81 @@ class Scanner37(Scanner37Base):

    pass

+    def bound_collection_from_tokens(
+        self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
+    ) -> list:
+        count = t.attr
+        assert isinstance(count, int)
+
+        assert count <= i
+
+        if collection_type == "CONST_DICT":
+            # constant dictonaries work via BUILD_CONST_KEY_MAP and
+            # handle the values() like sets and lists.
+            # However the keys() are an LOAD_CONST of the keys.
+            # adjust offset to account for this
+            count += 1
+
+        # For small lists don't bother
+        if count < 5:
+            return next_tokens + [t]
+
+        collection_start = i - count
+
+        for j in range(collection_start, i):
+            if tokens[j].kind not in (
+                "LOAD_CONST",
+                "LOAD_FAST",
+                "LOAD_GLOBAL",
+                "LOAD_NAME",
+            ):
+                return next_tokens + [t]
+
+        collection_enum = CONST_COLLECTIONS.index(collection_type)
+
+        # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
+        # add a boundary marker and change LOAD_CONST to something else
+        new_tokens = next_tokens[:-count]
+        start_offset = tokens[collection_start].offset
+        new_tokens.append(
+            Token(
+                opname="COLLECTION_START",
+                attr=collection_enum,
+                pattr=collection_type,
+                offset="%s_0" % start_offset,
+                linestart=False,
+                has_arg=True,
+                has_extended_arg=False,
+                opc=self.opc,
+            )
+        )
+        for j in range(collection_start, i):
+            new_tokens.append(
+                Token(
+                    opname="ADD_VALUE",
+                    attr=tokens[j].attr,
+                    pattr=tokens[j].pattr,
+                    offset=tokens[j].offset,
+                    linestart=tokens[j].linestart,
+                    has_arg=True,
+                    has_extended_arg=False,
+                    opc=self.opc,
+                )
+            )
+        new_tokens.append(
+            Token(
+                opname="BUILD_%s" % collection_type,
+                attr=t.attr,
+                pattr=t.pattr,
+                offset=t.offset,
+                linestart=t.linestart,
+                has_arg=t.has_arg,
+                has_extended_arg=False,
+                opc=t.opc,
+            )
+        )
+        return new_tokens
+
    def ingest(
        self, co, classname=None, code_objects={}, show_asm=None
    ):
--- a/uncompyle6/semantics/n_actions.py
+++ b/uncompyle6/semantics/n_actions.py
@@ -227,7 +227,7 @@ class NonterminalActions:
        self.indent_more(INDENT_PER_LEVEL)
        sep = ""
        if is_dict:
-            keys = flat_elems[-1].pattr
+            keys = flat_elems[-1].attr
            assert isinstance(keys, tuple)
            assert len(keys) == len(flat_elems) - 1
            for i, elem in enumerate(flat_elems[:-1]):
@@ -724,8 +724,8 @@ class NonterminalActions:
    def n_import_from(self, node):
        relative_path_index = 0
        if self.version >= (2, 5):
-            if node[relative_path_index].pattr > 0:
-                node[2].pattr = ("." * node[relative_path_index].pattr) + node[2].pattr
+            if node[relative_path_index].attr > 0:
+                node[2].pattr = ("." * node[relative_path_index].attr) + node[2].pattr
            if self.version > (2, 7):
                if isinstance(node[1].pattr, tuple):
                    imports = node[1].pattr