correct fn name on older 3.x cross decompile...

Also black, lint, and isort some
2025-08-02 16:44:46 +08:00 · 2023-06-30 20:30:06 -04:00
parent dc286b91c8
commit d6608712f1
7 changed files with 269 additions and 206 deletions
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -39,10 +39,10 @@ from typing import Optional, Tuple

 from xdis import iscode, instruction_size, Instruction
 from xdis.bytecode import _get_const_info
-from xdis.codetype import UnicodeForPython3

 from uncompyle6.scanners.tok import Token
 from uncompyle6.scanner import parse_fn_counts_30_35
+from uncompyle6.util import get_code_name
 import xdis

 # Get all the opcodes into globals
@@ -209,11 +209,18 @@ class Scanner3(Scanner):
        return

    def bound_collection_from_inst(
-        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
+        self,
+        insts: list,
+        next_tokens: list,
+        inst: Instruction,
+        t: Token,
+        i: int,
+        collection_type: str,
    ) -> Optional[list]:
        """
-        Try to a replace sequence of instruction that ends with a BUILD_xxx with a sequence that can
-        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
+        Try to a replace sequence of instruction that ends with a
+        BUILD_xxx with a sequence that can be parsed much faster, but
+        inserting the token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
@@ -291,10 +298,12 @@ class Scanner3(Scanner):
        return new_tokens

    def bound_map_from_inst(
-        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]:
+        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int
+    ) -> Optional[list]:
        """
-        Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
-        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
+        Try to a sequence of instruction that ends with a BUILD_MAP into
+        a sequence that can be parsed much faster, but inserting the
+        token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
@@ -309,21 +318,18 @@ class Scanner3(Scanner):
        assert (count * 2) <= i

        for j in range(collection_start, i, 2):
-            if insts[j].opname not in (
-                "LOAD_CONST",
-            ):
+            if insts[j].opname not in ("LOAD_CONST",):
                return None
-            if insts[j+1].opname not in (
-                "LOAD_CONST",
-            ):
+            if insts[j + 1].opname not in ("LOAD_CONST",):
                return None

        collection_start = i - (2 * count)
        collection_enum = CONST_COLLECTIONS.index("CONST_MAP")

-        # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
-        # add a boundary marker and change LOAD_CONST to something else
-        new_tokens = next_tokens[:-(2*count)]
+        # If we get here, all instructions before tokens[i] are LOAD_CONST and
+        # we can replace add a boundary marker and change LOAD_CONST to
+        # something else.
+        new_tokens = next_tokens[: -(2 * count)]
        start_offset = insts[collection_start].offset
        new_tokens.append(
            Token(
@@ -353,10 +359,10 @@ class Scanner3(Scanner):
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
-                    attr=insts[j+1].argval,
-                    pattr=insts[j+1].argrepr,
-                    offset=insts[j+1].offset,
-                    linestart=insts[j+1].starts_line,
+                    attr=insts[j + 1].argval,
+                    pattr=insts[j + 1].argrepr,
+                    offset=insts[j + 1].offset,
+                    linestart=insts[j + 1].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
@@ -376,8 +382,9 @@ class Scanner3(Scanner):
        )
        return new_tokens

-    def ingest(self, co, classname=None, code_objects={}, show_asm=None
-        ) -> Tuple[list, dict]:
+    def ingest(
+        self, co, classname=None, code_objects={}, show_asm=None
+    ) -> Tuple[list, dict]:
        """
        Create "tokens" the bytecode of an Python code object. Largely these
        are the opcode name, but in some cases that has been modified to make parsing
@@ -387,14 +394,17 @@ class Scanner3(Scanner):
        Some transformations are made to assist the deparsing grammar:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
-           -  operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
+           -  operands with stack argument counts or flag masks are appended to the
+              opcode name, e.g.:
              *  BUILD_LIST, BUILD_SET
-              *  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
+              *  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional
+                 arguments
           -  EXTENDED_ARGS instructions are removed

-        Also, when we encounter certain tokens, we add them to a set which will cause custom
-        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
-        cause specific rules for the specific number of arguments they take.
+        Also, when we encounter certain tokens, we add them to a set
+        which will cause custom grammar rules. Specifically, variable
+        arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules
+        for the specific number of arguments they take.
        """

        if not show_asm:
@@ -420,7 +430,6 @@ class Scanner3(Scanner):

        n = len(self.insts)
        for i, inst in enumerate(self.insts):
-
            opname = inst.opname
            # We need to detect the difference between:
            #   raise AssertionError
@@ -437,12 +446,12 @@ class Scanner3(Scanner):
                    prev_inst = self.insts[i - 1]
                    assert_can_follow = (
                        prev_inst.opname in ("JUMP_IF_TRUE", "JUMP_IF_FALSE")
-                        and i + 1 < n )
+                        and i + 1 < n
+                    )
                    jump_if_inst = prev_inst
            else:
                assert_can_follow = (
-                    opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
-                    and i + 1 < n
+                    opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") and i + 1 < n
                )
                jump_if_inst = inst
            if assert_can_follow:
@@ -452,7 +461,9 @@ class Scanner3(Scanner):
                    and next_inst.argval == "AssertionError"
                    and jump_if_inst.argval
                ):
-                    raise_idx = self.offset2inst_index[self.prev_op[jump_if_inst.argval]]
+                    raise_idx = self.offset2inst_index[
+                        self.prev_op[jump_if_inst.argval]
+                    ]
                    raise_inst = self.insts[raise_idx]
                    if raise_inst.opname.startswith("RAISE_VARARGS"):
                        self.load_asserts.add(next_inst.offset)
@@ -468,22 +479,21 @@ class Scanner3(Scanner):
        new_tokens = []

        for i, inst in enumerate(self.insts):
-
            opname = inst.opname
            argval = inst.argval
            pattr = inst.argrepr

            t = Token(
-                    opname=opname,
-                    attr=argval,
-                    pattr=pattr,
-                    offset=inst.offset,
-                    linestart=inst.starts_line,
-                    op=inst.opcode,
-                    has_arg=inst.has_arg,
-                    has_extended_arg=inst.has_extended_arg,
-                    opc=self.opc,
-                )
+                opname=opname,
+                attr=argval,
+                pattr=pattr,
+                offset=inst.offset,
+                linestart=inst.starts_line,
+                op=inst.opcode,
+                has_arg=inst.has_arg,
+                has_extended_arg=inst.has_extended_arg,
+                opc=self.opc,
+            )

            # things that smash new_tokens like BUILD_LIST have to come first.
            if opname in (
@@ -502,11 +512,13 @@ class Scanner3(Scanner):
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
-            elif opname in (
-                "BUILD_MAP",
-            ):
+            elif opname in ("BUILD_MAP",):
                try_tokens = self.bound_map_from_inst(
-                    self.insts, new_tokens, inst, t, i,
+                    self.insts,
+                    new_tokens,
+                    inst,
+                    t,
+                    i,
                )
                if try_tokens is not None:
                    new_tokens = try_tokens
@@ -573,9 +585,7 @@ class Scanner3(Scanner):
            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
-                    co_name = const.co_name
-                    if isinstance(const.co_name, UnicodeForPython3):
-                        co_name = const.co_name.value.decode("utf-8")
+                    co_name = get_code_name(const)
                    if co_name == "<lambda>":
                        assert opname == "LOAD_CONST"
                        opname = "LOAD_LAMBDA"
@@ -629,7 +639,7 @@ class Scanner3(Scanner):
                else:
                    pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35(
                        inst.argval
-                        )
+                    )

                    pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated"

@@ -715,11 +725,13 @@ class Scanner3(Scanner):
                        and self.insts[i + 1].opname == "JUMP_FORWARD"
                    )

-                    if (self.version[:2] == (3, 0) and self.insts[i + 1].opname == "JUMP_FORWARD"
-                        and not is_continue):
+                    if (
+                        self.version[:2] == (3, 0)
+                        and self.insts[i + 1].opname == "JUMP_FORWARD"
+                        and not is_continue
+                    ):
                        target_prev = self.offset2inst_index[self.prev_op[target]]
-                        is_continue = (
-                            self.insts[target_prev].opname == "SETUP_LOOP")
+                        is_continue = self.insts[target_prev].opname == "SETUP_LOOP"

                    if is_continue or (
                        inst.offset in self.stmts
@@ -736,7 +748,10 @@ class Scanner3(Scanner):
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
-                        if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
+                        if (
+                            new_tokens[-1].kind == "JUMP_BACK"
+                            and new_tokens[-1].attr <= argval
+                        ):
                            if new_tokens[-2].kind == "BREAK_LOOP":
                                del new_tokens[-1]
                            else:
@@ -809,7 +824,10 @@ class Scanner3(Scanner):
            if inst.has_arg:
                label = self.fixed_jumps.get(offset)
                oparg = inst.arg
-                if self.version >= (3, 6) and self.code[offset] == self.opc.EXTENDED_ARG:
+                if (
+                    self.version >= (3, 6)
+                    and self.code[offset] == self.opc.EXTENDED_ARG
+                ):
                    j = xdis.next_offset(op, self.opc, offset)
                    next_offset = xdis.next_offset(op, self.opc, j)
                else:
@@ -1082,7 +1100,6 @@ class Scanner3(Scanner):
                and (target > offset)
                and pretarget.offset != offset
            ):
-
                # FIXME: hack upon hack...
                # In some cases the pretarget can be a jump to the next instruction
                # and these aren't and/or's either. We limit to 3.5+ since we experienced there
@@ -1104,7 +1121,6 @@ class Scanner3(Scanner):

            # Is it an "and" inside an "if" or "while" block
            if op == self.opc.POP_JUMP_IF_FALSE:
-
                # Search for another POP_JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
@@ -1357,7 +1373,6 @@ class Scanner3(Scanner):
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)
            else:
-
                # FIXME: this is very convoluted and based on rather hacky
                # empirical evidence. It should go a way when
                # we have better control-flow analysis