Merge branch 'master' into python-3.3-to-3.5

2025-08-04 01:09:52 +08:00 · 2022-04-27 04:00:21 -04:00
parent 3471d11dd5 a356a8e0ee
commit e930c9c6ef
9 changed files with 150 additions and 8 deletions
--- a/test/bytecode_3.3_run/05_long_literals.pyc
+++ b/test/bytecode_3.3_run/05_long_literals.pyc
--- a/test/test_pyenvlib.py
+++ b/test/test_pyenvlib.py
@@ -31,8 +31,6 @@ import xdis.magics as magics
 # ----- configure this for your needs
 python_versions = [v for v in magics.python_versions if re.match("^[0-9.]+$", v)]
 print(python_versions)
 sys.exit(0)
 # FIXME: we should remove Python versions that we don't support.
 # These include Jython, and Python bytecode changes pre release.
--- a/uncompyle6/bin/pydisassemble.py
+++ b/uncompyle6/bin/pydisassemble.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # Mode: -*- python -*-
 #
-# Copyright (c) 2015-2016, 2018, 2020 by Rocky Bernstein <rb@dustyfeet.com>
+# Copyright (c) 2015-2016, 2018, 2020, 2022 by Rocky Bernstein <rb@dustyfeet.com>
 #
 from __future__ import print_function
 import sys, os, getopt
@@ -16,11 +16,18 @@ Usage:
  {0} [OPTIONS]... FILE
  {0} [--help | -h | -V | --version]
-Disassemble FILE with the instruction mangling that is done to
+Disassemble/Tokenize FILE with in the way that is done to
 assist uncompyle6 in parsing the instruction stream. For example
 instructions with variable-length arguments like CALL_FUNCTION and
 BUILD_LIST have argument counts appended to the instruction name, and
-COME_FROM instructions are inserted into the instruction stream.
+COME_FROM psuedo instructions are inserted into the instruction stream.
 Bit flag values encoded in an operand are expanding, EXTENDED_ARG
 value are folded into the following instruction operand.
 Like the parser, you may find this more high-level and or helpful.
 However if you want a true disassembler see the Standard built-in
 Python library module "dis", or pydisasm from the cross-version
 Python bytecode package "xdis".
 Examples:
  {0} foo.pyc
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -61,6 +61,7 @@ class PythonParser(GenericASTBuilder):
            "except_stmts",
            "exprlist",
            "importlist",
            "key_value_pairs",
            "kvlist",
            "kwargs",
            "l_stmts",
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -748,6 +748,12 @@ class Python3Parser(PythonParser):
                kvlist_n = "expr " * (token.attr)
                rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
                self.addRule(rule, nop_func)
            elif opname.startswith("BUILD_DICT_OLDER"):
                rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
                          key_value_pairs ::= key_value_pair+
                          key_value_pair  ::= ADD_KEY ADD_VALUE
                       """
                self.addRule(rule, nop_func)
            elif opname.startswith("BUILD_LIST_UNPACK"):
                v = token.attr
                rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
--- a/uncompyle6/scanner.py
+++ b/uncompyle6/scanner.py
@@ -84,7 +84,7 @@ def long(num):
    return num
-CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
+CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT", "CONST_MAP")
 class Code(object):
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -208,6 +208,10 @@ class Scanner3(Scanner):
    def bound_collection_from_inst(
        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
    ):
        """
        Try to a sequence of instruction that ends with a BUILD_xxx into a sequence that can
        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
@@ -228,10 +232,13 @@ class Scanner3(Scanner):
        for j in range(collection_start, i):
            if insts[j].opname not in (
                "LOAD_ASSERT",
                "LOAD_CODE",
                "LOAD_CONST",
                "LOAD_FAST",
                "LOAD_GLOBAL",
                "LOAD_NAME",
                "LOAD_STR",
            ):
                return None
@@ -280,6 +287,94 @@ class Scanner3(Scanner):
        )
        return new_tokens
    def bound_map_from_inst(
        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int):
        """
        Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
        if count > i:
            return None
        # For small lists don't bother
        if count < 5:
            return None
        collection_start = i - (count * 2)
        assert (count * 2) <= i
        for j in range(collection_start, i, 2):
            if insts[j].opname not in (
                "LOAD_CONST",
            ):
                return None
            if insts[j+1].opname not in (
                "LOAD_CONST",
            ):
                return None
        collection_start = i - (2 * count)
        collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
        # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
        # add a boundary marker and change LOAD_CONST to something else
        new_tokens = next_tokens[:-(2*count)]
        start_offset = insts[collection_start].offset
        new_tokens.append(
            Token(
                opname="COLLECTION_START",
                attr=collection_enum,
                pattr="CONST_MAP",
                offset="%s_0" % start_offset,
                linestart=False,
                has_arg=True,
                has_extended_arg=False,
                opc=self.opc,
            )
        )
        for j in range(collection_start, i, 2):
            new_tokens.append(
                Token(
                    opname="ADD_KEY",
                    attr=insts[j].argval,
                    pattr=insts[j].argrepr,
                    offset=insts[j].offset,
                    linestart=insts[j].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                )
            )
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
                    attr=insts[j+1].argval,
                    pattr=insts[j+1].argrepr,
                    offset=insts[j+1].offset,
                    linestart=insts[j+1].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                )
            )
        new_tokens.append(
            Token(
                opname="BUILD_DICT_OLDER",
                attr=t.attr,
                pattr=t.pattr,
                offset=t.offset,
                linestart=t.linestart,
                has_arg=t.has_arg,
                has_extended_arg=False,
                opc=t.opc,
            )
        )
        return new_tokens
    def ingest(self, co, classname=None, code_objects={}, show_asm=None
        ):
        """
@@ -406,6 +501,15 @@ class Scanner3(Scanner):
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
            elif opname in (
                "BUILD_MAP",
            ):
                try_tokens = self.bound_map_from_inst(
                    self.insts, new_tokens, inst, t, i,
                )
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
            argval = inst.argval
            op = inst.opcode
--- a/uncompyle6/scanners/scanner37.py
+++ b/uncompyle6/scanners/scanner37.py
@@ -63,10 +63,12 @@ class Scanner37(Scanner37Base):
        for j in range(collection_start, i):
            if tokens[j].kind not in (
                "LOAD_CODE",
                "LOAD_CONST",
                "LOAD_FAST",
                "LOAD_GLOBAL",
                "LOAD_NAME",
                "LOAD_STR",
            ):
                return next_tokens + [t]
--- a/uncompyle6/semantics/n_actions.py
+++ b/uncompyle6/semantics/n_actions.py
@@ -244,8 +244,6 @@ class NonterminalActions:
                sep = ","
        else:
            for elem in flat_elems:
                if elem.kind != "ADD_VALUE":
                    from trepan.api import debug; debug()
                assert elem.kind == "ADD_VALUE"
                value = elem.pattr
                if elem.linestart is not None:
@@ -395,6 +393,32 @@ class NonterminalActions:
                template = ("**%C", (0, kwargs, ", **"))
                self.template_engine(template, node)
                sep = ""
            if node[0].kind == "COLLECTION_START":
                key_value_pairs = node[1]
                for key_value_pair in key_value_pairs:
                    key, value = key_value_pair
                    if key.linestart is not None:
                        line_number = key.linestart
                    if line_number != self.line_number:
                        sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
                        self.line_number = line_number
                    self.write(sep)
                    self.write(key.pattr)
                    self.write(": ")
                    if value.linestart is not None:
                        line_number = value.linestart
                    if line_number != self.line_number:
                        sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
                        self.line_number = line_number
                    else:
                        sep += " "
                        pass
                    self.write(value.pattr)
                    sep = ", "
                    pass
                if sep.startswith(",\n"):
                    self.write(sep[1:])
                pass
            pass
        else: