Handle long 2.x bytecode literals more efficiently

2025-08-04 01:09:52 +08:00 · 2022-04-27 13:47:56 -04:00
parent cfd6166d8d
commit 8e5faa933f
7 changed files with 153 additions and 15 deletions
--- a/test/bytecode_2.7_run/05_long_literals.pyc
+++ b/test/bytecode_2.7_run/05_long_literals.pyc
--- a/uncompyle6/parsers/parse2.py
+++ b/uncompyle6/parsers/parse2.py
@@ -312,6 +312,14 @@ class Python2Parser(PythonParser):
            opname_base = opname[: opname.rfind("_")]
            if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"):
                rule = """
                       add_consts          ::= ADD_VALUE*
                       const_list          ::= COLLECTION_START add_consts %s
                       expr                ::= const_list
                       """ % opname
                self.addRule(rule, nop_func)
            # The order of opname listed is roughly sorted below
            if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"):
                # We do this complicated test to speed up parsing of
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -748,18 +748,37 @@ class Python3Parser(PythonParser):
                kvlist_n = "expr " * (token.attr)
                rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
                self.addRule(rule, nop_func)
            elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
                if opname == "BUILD_CONST_DICT":
                    rule = """
                           add_consts          ::= ADD_VALUE*
                           const_list          ::= COLLECTION_START add_consts %s
                           dict                ::= const_list
                           expr                ::= dict
                           """ % opname
                else:
                    rule = """
                           add_consts          ::= ADD_VALUE*
                           const_list          ::= COLLECTION_START add_consts %s
                           expr                ::= const_list
                           """ % opname
                self.addRule(rule, nop_func)
            elif opname.startswith("BUILD_DICT_OLDER"):
                rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
                          key_value_pairs ::= key_value_pair+
                          key_value_pair  ::= ADD_KEY ADD_VALUE
                       """
                self.addRule(rule, nop_func)
            elif opname.startswith("BUILD_LIST_UNPACK"):
                v = token.attr
                rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
                self.addRule(rule, nop_func)
                rule = "expr ::= build_list_unpack"
                self.addRule(rule, nop_func)
            elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"):
                kvlist_n = "kvlist_%s" % token.attr
                if opname == "BUILD_MAP_n":
--- a/uncompyle6/scanner.py
+++ b/uncompyle6/scanner.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2016, 2018-2021 by Rocky Bernstein
+#  Copyright (c) 2016, 2018-2022 by Rocky Bernstein
 #  Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
 #  Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
 #  Copyright (c) 1999 John Aycock
@@ -24,7 +24,6 @@ scanners, e.g. for Python 2.7 or 3.4.
 from typing import Optional
 from array import array
 from collections import namedtuple
 from sys import intern  # noqa
 from uncompyle6.scanners.tok import Token
 from xdis.version_info import IS_PYPY, version_tuple_to_str
@@ -125,6 +124,80 @@ class Scanner(object):
        # FIXME: This weird Python2 behavior is not Python3
        self.resetTokenClass()
    def bound_collection_from_tokens(
        self, tokens, t, i, collection_type
    ):
        count = t.attr
        assert isinstance(count, int)
        assert count <= i
        if collection_type == "CONST_DICT":
            # constant dictonaries work via BUILD_CONST_KEY_MAP and
            # handle the values() like sets and lists.
            # However the keys() are an LOAD_CONST of the keys.
            # adjust offset to account for this
            count += 1
        # For small lists don't bother
        if count < 5:
            return None
        collection_start = i - count
        for j in range(collection_start, i):
            if tokens[j].kind not in (
                "LOAD_CONST",
                "LOAD_FAST",
                "LOAD_GLOBAL",
                "LOAD_NAME",
            ):
                return None
        collection_enum = CONST_COLLECTIONS.index(collection_type)
        # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
        # add a boundary marker and change LOAD_CONST to something else
        new_tokens = tokens[:-count]
        start_offset = tokens[collection_start].offset
        new_tokens.append(
            Token(
                opname="COLLECTION_START",
                attr=collection_enum,
                pattr=collection_type,
                offset="%s_0" % start_offset,
                has_arg=True,
                opc=self.opc,
                has_extended_arg=False,
            )
        )
        for j in range(collection_start, i):
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
                    attr=tokens[j].attr,
                    pattr=tokens[j].pattr,
                    offset=tokens[j].offset,
                    has_arg=True,
                    linestart=tokens[j].linestart,
                    opc=self.opc,
                    has_extended_arg=False,
                )
            )
        new_tokens.append(
            Token(
                opname="BUILD_%s" % collection_type,
                attr=t.attr,
                pattr=t.pattr,
                offset=t.offset,
                has_arg=t.has_arg,
                linestart=t.linestart,
                opc=t.opc,
                has_extended_arg=False,
            )
        )
        return new_tokens
    def build_instructions(self, co):
        """
        Create a list of instructions (a structured object rather than
--- a/uncompyle6/scanners/scanner2.py
+++ b/uncompyle6/scanners/scanner2.py
@@ -200,7 +200,6 @@ class Scanner2(Scanner):
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """
        if not show_asm:
            show_asm = self.show_asm
@@ -212,7 +211,7 @@ class Scanner2(Scanner):
                print(instr.disassemble())
        # list of tokens/instructions
-        tokens = []
+        new_tokens = []
        # "customize" is in the process of going away here
        customize = {}
@@ -289,7 +288,7 @@ class Scanner2(Scanner):
                        if come_from_type not in ("LOOP", "EXCEPT"):
                            come_from_name = "COME_FROM_%s" % come_from_type
                        pass
-                    tokens.append(
+                    new_tokens.append(
                        Token(
                            come_from_name,
                            jump_offset,
@@ -313,6 +312,24 @@ class Scanner2(Scanner):
                if op == self.opc.EXTENDED_ARG:
                    extended_arg += self.extended_arg_val(oparg)
                    continue
                # Note: name used to match on rather than op since
                # BUILD_SET isn't in earlier Pythons.
                if op_name in (
                    "BUILD_LIST",
                    "BUILD_SET",
                ):
                    t = Token(
                        op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
                    )
                    collection_type = op_name.split("_")[1]
                    next_tokens = self.bound_collection_from_tokens(
                        new_tokens, t, len(new_tokens), "CONST_%s" % collection_type
                    )
                    if next_tokens is not None:
                        new_tokens = next_tokens
                        continue
                if op in self.opc.CONST_OPS:
                    const = co.co_consts[oparg]
                    if iscode(const):
@@ -347,12 +364,12 @@ class Scanner2(Scanner):
                elif op in self.opc.JREL_OPS:
                    #  use instead: hasattr(self, 'patch_continue'): ?
                    if self.version[:2] == (2, 7):
-                        self.patch_continue(tokens, offset, op)
+                        self.patch_continue(new_tokens, offset, op)
                    pattr = repr(offset + 3 + oparg)
                elif op in self.opc.JABS_OPS:
                    # use instead: hasattr(self, 'patch_continue'): ?
                    if self.version[:2] == (2, 7):
-                        self.patch_continue(tokens, offset, op)
+                        self.patch_continue(new_tokens, offset, op)
                    pattr = repr(oparg)
                elif op in self.opc.LOCAL_OPS:
                    pattr = varnames[oparg]
@@ -433,13 +450,13 @@ class Scanner2(Scanner):
            linestart = self.linestarts.get(offset, None)
            if offset not in replace:
-                tokens.append(
+                new_tokens.append(
                    Token(
                        op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
                    )
                )
            else:
-                tokens.append(
+                new_tokens.append(
                    Token(
                        replace[offset],
                        oparg,
@@ -455,10 +472,10 @@ class Scanner2(Scanner):
            pass
        if show_asm in ("both", "after"):
-            for t in tokens:
+            for t in new_tokens:
                print(t.format(line_prefix=""))
            print()
-        return tokens, customize
+        return new_tokens, customize
    def build_statement_indices(self):
        code = self.code
--- a/uncompyle6/scanners/scanner26.py
+++ b/uncompyle6/scanners/scanner26.py
@@ -123,7 +123,9 @@ class Scanner26(scan.Scanner2):
            i = self.next_stmt[i]
        extended_arg = 0
        i = -1
        for offset in self.op_range(0, codelen):
            i += 1
            op = self.code[offset]
            op_name = self.opname[op]
            oparg = None; pattr = None
@@ -156,8 +158,28 @@ class Scanner26(scan.Scanner2):
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == self.opc.EXTENDED_ARG:
-                    extended_arg = oparg * L65536
+                     extended_arg += self.extended_arg_val(oparg)
-                    continue
+                     continue
                # Note: name used to match on rather than op since
                # BUILD_SET isn't in earlier Pythons.
                if op_name in (
                    "BUILD_LIST",
                    "BUILD_SET",
                ):
                    t = Token(
                        op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc
                    )
                    collection_type = op_name.split("_")[1]
                    next_tokens = self.bound_collection_from_tokens(
                        tokens, t, i, "CONST_%s" % collection_type
                    )
                    if next_tokens is not None:
                        tokens = next_tokens
                        continue
                if op in self.opc.CONST_OPS:
                    const = co.co_consts[oparg]
                    # We can't use inspect.iscode() because we may be
--- a/uncompyle6/scanners/scanner37.py
+++ b/uncompyle6/scanners/scanner37.py
@@ -24,8 +24,7 @@ scanner routine for Python 3.
 from typing import Tuple
-from uncompyle6.scanner import CONST_COLLECTIONS
+from uncompyle6.scanner import CONST_COLLECTIONS, Token
 from uncompyle6.scanners.tok import Token
 from uncompyle6.scanners.scanner37base import Scanner37Base
 # bytecode verification, verify(), uses JUMP_OPs from here