merge from master

2025-08-03 08:49:51 +08:00 · 2022-05-14 09:40:53 -04:00
parent 4f6d3a3d7e
commit b8856993d2
6 changed files with 65 additions and 43 deletions
--- a/uncompyle6/parsers/parse14.py
+++ b/uncompyle6/parsers/parse14.py
@@ -64,7 +64,7 @@ class Python14Parser(Python15Parser):

            if opname_base == "UNPACK_VARARG":
                if token.attr > 1:
-                    self.addRule(f"star_args ::= RESERVE_FAST {opname} args_store", nop_func)
+                    self.addRule("star_args ::= RESERVE_FAST %s args_store" % opname, nop_func)


    def reduce_is_invalid(self, rule, ast, tokens, first, last):
--- a/uncompyle6/scanner.py
+++ b/uncompyle6/scanner.py
@@ -127,8 +127,8 @@ class Scanner(object):
        # FIXME: This weird Python2 behavior is not Python3
        self.resetTokenClass()

-    def bound_collection(
-        self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
+    def bound_collection_from_tokens(
+        self, tokens, t, i, collection_type
    ):
        count = t.attr
        assert isinstance(count, int)
@@ -144,7 +144,7 @@ class Scanner(object):

        # For small lists don't bother
        if count < 5:
-            return next_tokens + [t]
+            return None

        collection_start = i - count

@@ -155,13 +155,13 @@ class Scanner(object):
                "LOAD_GLOBAL",
                "LOAD_NAME",
            ):
-                return next_tokens + [t]
+                return None

        collection_enum = CONST_COLLECTIONS.index(collection_type)

        # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
        # add a boundary marker and change LOAD_CONST to something else
-        new_tokens = next_tokens[:-count]
+        new_tokens = tokens[:-count]
        start_offset = tokens[collection_start].offset
        new_tokens.append(
            Token(
--- a/uncompyle6/scanners/scanner2.py
+++ b/uncompyle6/scanners/scanner2.py
@@ -200,6 +200,7 @@ class Scanner2(Scanner):
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """
+
        if not show_asm:
            show_asm = self.show_asm

@@ -1441,3 +1442,15 @@ class Scanner2(Scanner):
            instr_offsets = filtered
            filtered = []
        return instr_offsets
+
+
+if __name__ == "__main__":
+    import inspect
+    from xdis.version_info import PYTHON_VERSION_TRIPLE
+
+    co = inspect.currentframe().f_code
+
+    tokens, customize = Scanner2(PYTHON_VERSION_TRIPLE).ingest(co)
+    for t in tokens:
+            print(t)
+    pass
--- a/uncompyle6/scanners/scanner26.py
+++ b/uncompyle6/scanners/scanner26.py
@@ -26,6 +26,7 @@ import sys
 import uncompyle6.scanners.scanner2 as scan

 # bytecode verification, verify(), uses JUMP_OPs from here
+from xdis import iscode
 from xdis.opcodes import opcode_26
 from xdis.bytecode import _get_const_info

@@ -72,7 +73,7 @@ class Scanner26(scan.Scanner2):
        bytecode = self.build_instructions(co)

        # show_asm = 'after'
-        if show_asm in ('both', 'before'):
+        if show_asm in ("both", "before"):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

@@ -81,7 +82,7 @@ class Scanner26(scan.Scanner2):

        customize = {}
        if self.is_pypy:
-            customize['PyPy'] = 1
+            customize["PyPy"] = 0

        codelen = len(self.code)

@@ -93,6 +94,7 @@ class Scanner26(scan.Scanner2):
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        for i in self.op_range(0, codelen):
+
            # We need to detect the difference between:
            #   raise AssertionError
            #  and
@@ -115,9 +117,9 @@ class Scanner26(scan.Scanner2):
                # Distinguish "print ..." from "print ...,"
                if self.code[last_stmt] == self.opc.PRINT_ITEM:
                    if self.code[i] == self.opc.PRINT_ITEM:
-                        replace[i] = 'PRINT_ITEM_CONT'
+                        replace[i] = "PRINT_ITEM_CONT"
                    elif self.code[i] == self.opc.PRINT_NEWLINE:
-                        replace[i] = 'PRINT_NEWLINE_CONT'
+                        replace[i] = "PRINT_NEWLINE_CONT"
            last_stmt = i
            i = self.next_stmt[i]

@@ -181,29 +183,25 @@ class Scanner26(scan.Scanner2):

                if op in self.opc.CONST_OPS:
                    const = co.co_consts[oparg]
-                    # We can't use inspect.iscode() because we may be
-                    # using a different version of Python than the
-                    # one that this was byte-compiled on. So the code
-                    # types may mismatch.
-                    if hasattr(const, 'co_name'):
+                    if iscode(const):
                        oparg = const
-                        if const.co_name == '<lambda>':
-                            assert op_name == 'LOAD_CONST'
-                            op_name = 'LOAD_LAMBDA'
+                        if const.co_name == "<lambda>":
+                            assert op_name == "LOAD_CONST"
+                            op_name = "LOAD_LAMBDA"
                        elif const.co_name == self.genexpr_name:
-                            op_name = 'LOAD_GENEXPR'
-                        elif const.co_name == '<dictcomp>':
-                            op_name = 'LOAD_DICTCOMP'
-                        elif const.co_name == '<setcomp>':
-                            op_name = 'LOAD_SETCOMP'
+                            op_name = "LOAD_GENEXPR"
+                        elif const.co_name == "<dictcomp>":
+                            op_name = "LOAD_DICTCOMP"
+                        elif const.co_name == "<setcomp>":
+                            op_name = "LOAD_SETCOMP"
                        else:
                            op_name = "LOAD_CODE"
-                        # verify uses 'pattr' for comparison, since 'attr'
+                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
-                        # pattr = 'code_object @ 0x%x %s->%s' % \
+                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
-                        pattr = '<code_object ' + const.co_name + '>'
+                        pattr = "<code_object " + const.co_name + ">"
                    else:
                        if oparg < len(co.co_consts):
                            argval, _ = _get_const_info(oparg, co.co_consts)
@@ -235,6 +233,7 @@ class Scanner26(scan.Scanner2):
                    pattr = self.opc.cmp_op[oparg]
                elif op in self.opc.FREE_OPS:
                    pattr = free[oparg]
+
            if op in self.varargs_ops:
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
@@ -285,25 +284,36 @@ class Scanner26(scan.Scanner2):

            elif op == self.opc.LOAD_GLOBAL:
                if offset in self.load_asserts:
-                    op_name = 'LOAD_ASSERT'
+                    op_name = "LOAD_ASSERT"
            elif op == self.opc.RETURN_VALUE:
                if offset in self.return_end_ifs:
-                    op_name = 'RETURN_END_IF'
+                    op_name = "RETURN_END_IF"

            linestart = self.linestarts.get(offset, None)

            if offset not in replace:
-                tokens.append(Token(
-                    op_name, oparg, pattr, offset, linestart, op,
-                    has_arg, self.opc))
+                tokens.append(
+                    Token(
+                        op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
+                    )
+                )
            else:
-                tokens.append(Token(
-                    replace[offset], oparg, pattr, offset, linestart, op,
-                    has_arg, self.opc))
+                tokens.append(
+                    Token(
+                        replace[offset],
+                        oparg,
+                        pattr,
+                        offset,
+                        linestart,
+                        op,
+                        has_arg,
+                        self.opc,
+                    )
+                )
                pass
            pass

-        if show_asm in ('both', 'after'):
+        if show_asm in ("both", "after"):
            for t in tokens:
                print(t.format(line_prefix=""))
            print()
--- a/uncompyle6/scanners/scanner37.py
+++ b/uncompyle6/scanners/scanner37.py
@@ -156,7 +156,7 @@ class Scanner37(Scanner37Base):
                    if t.kind.startswith("BUILD_CONST_KEY_MAP")
                    else t.kind.split("_")[1]
                )
-                new_tokens = self.bound_collection(
+                new_tokens = self.bound_collection_from_tokens(
                    tokens, new_tokens, t, i, "CONST_%s" % collection_type
                )
                continue
--- a/uncompyle6/scanners/scanner38.py
+++ b/uncompyle6/scanners/scanner38.py
@@ -22,8 +22,6 @@ This sets up opcodes Python's 3.8 and calls a generalized
 scanner routine for Python 3.7 and up.
 """

-from typing import Dict, Tuple
-
 from uncompyle6.scanners.tok import off2int
 from uncompyle6.scanners.scanner37 import Scanner37
 from uncompyle6.scanners.scanner37base import Scanner37Base
@@ -45,7 +43,7 @@ class Scanner38(Scanner37):

    def ingest(
        self, co, classname=None, code_objects={}, show_asm=None
-    ) -> Tuple[list, dict]:
+    ) -> tuple:
        """
        Create "tokens" the bytecode of an Python code object. Largely these
        are the opcode name, but in some cases that has been modified to make parsing
@@ -73,7 +71,7 @@ class Scanner38(Scanner37):
        # The value is where the loop ends. In current Python,
        # JUMP_BACKS are always to loops. And blocks are ordered so that the
        # JUMP_BACK with the highest offset will be where the range ends.
-        jump_back_targets: Dict[int, int] = {}
+        jump_back_targets = {}
        for token in tokens:
            if token.kind == "JUMP_BACK":
                jump_back_targets[token.attr] = token.offset
@@ -92,7 +90,7 @@ class Scanner38(Scanner37):
            if offset == next_end:
                loop_ends.pop()
                if self.debug:
-                    print(f"{'  ' * len(loop_ends)}remove loop offset {offset}")
+                    print("%sremove loop offset %s" % (" " * len(loop_ends), offset))
                    pass
                next_end = (
                    loop_ends[-1]
@@ -106,7 +104,8 @@ class Scanner38(Scanner37):
                next_end = off2int(jump_back_targets[offset], prefer_last=False)
                if self.debug:
                    print(
-                        f"{'  ' * len(loop_ends)}adding loop offset {offset} ending at {next_end}"
+                        "%sadding loop offset %s ending at %s"
+                        % ("  " * len(loop_ends), offset, next_end)
                    )
                loop_ends.append(next_end)

@@ -165,4 +164,4 @@ if __name__ == "__main__":
            print(t.format())
        pass
    else:
-        print(f"Need to be Python 3.8 to demo; I am version {version_tuple_to_str()}.")
+        print("Need to be Python 3.8 to demo; I am version %s." % version_tuple_to_str())