Start handling BUILD_MAP (a class of dict)

2025-08-03 00:45:53 +08:00 · 2022-04-26 15:37:42 -04:00
parent 81ff994a41
commit c25fa61e33
5 changed files with 135 additions and 3 deletions
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -61,6 +61,7 @@ class PythonParser(GenericASTBuilder):
            "except_stmts",
            "exprlist",
            "importlist",
            "key_value_pairs",
            "kvlist",
            "kwargs",
            "l_stmts",
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -748,6 +748,12 @@ class Python3Parser(PythonParser):
                kvlist_n = "expr " * (token.attr)
                rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname)
                self.addRule(rule, nop_func)
            elif opname.startswith("BUILD_DICT_OLDER"):
                rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER
                          key_value_pairs ::= key_value_pair+
                          key_value_pair  ::= ADD_KEY ADD_VALUE
                       """
                self.addRule(rule, nop_func)
            elif opname.startswith("BUILD_LIST_UNPACK"):
                v = token.attr
                rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname)
--- a/uncompyle6/scanner.py
+++ b/uncompyle6/scanner.py
@@ -84,7 +84,7 @@ def long(num):
    return num
-CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
+CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT", "CONST_MAP")
 class Code(object):
--- a/uncompyle6/scanners/scanner3.py
+++ b/uncompyle6/scanners/scanner3.py
@@ -210,6 +210,10 @@ class Scanner3(Scanner):
    def bound_collection_from_inst(
        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
    ) -> Optional[list]:
        """
        Try to a sequence of instruction that ends with a BUILD_xxx into a sequence that can
        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
@@ -285,6 +289,94 @@ class Scanner3(Scanner):
        )
        return new_tokens
    def bound_map_from_inst(
        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]:
        """
        Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
        """
        count = t.attr
        assert isinstance(count, int)
        if count > i:
            return None
        # For small lists don't bother
        if count < 5:
            return None
        collection_start = i - (count * 2)
        assert (count * 2) <= i
        for j in range(collection_start, i, 2):
            if insts[j].opname not in (
                "LOAD_CONST",
            ):
                return None
            if insts[j+1].opname not in (
                "LOAD_CONST",
            ):
                return None
        collection_start = i - (2 * count)
        collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
        # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
        # add a boundary marker and change LOAD_CONST to something else
        new_tokens = next_tokens[:-(2*count)]
        start_offset = insts[collection_start].offset
        new_tokens.append(
            Token(
                opname="COLLECTION_START",
                attr=collection_enum,
                pattr="CONST_MAP",
                offset=f"{start_offset}_0",
                linestart=False,
                has_arg=True,
                has_extended_arg=False,
                opc=self.opc,
            )
        )
        for j in range(collection_start, i, 2):
            new_tokens.append(
                Token(
                    opname="ADD_KEY",
                    attr=insts[j].argval,
                    pattr=insts[j].argrepr,
                    offset=insts[j].offset,
                    linestart=insts[j].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                )
            )
            new_tokens.append(
                Token(
                    opname="ADD_VALUE",
                    attr=insts[j+1].argval,
                    pattr=insts[j+1].argrepr,
                    offset=insts[j+1].offset,
                    linestart=insts[j+1].starts_line,
                    has_arg=True,
                    has_extended_arg=False,
                    opc=self.opc,
                )
            )
        new_tokens.append(
            Token(
                opname=f"BUILD_DICT_OLDER",
                attr=t.attr,
                pattr=t.pattr,
                offset=t.offset,
                linestart=t.linestart,
                has_arg=t.has_arg,
                has_extended_arg=False,
                opc=t.opc,
            )
        )
        return new_tokens
    def ingest(self, co, classname=None, code_objects={}, show_asm=None
        ) -> Tuple[list, dict]:
        """
@@ -411,6 +503,15 @@ class Scanner3(Scanner):
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
            elif opname in (
                "BUILD_MAP",
            ):
                try_tokens = self.bound_map_from_inst(
                    self.insts, new_tokens, inst, t, i,
                )
                if try_tokens is not None:
                    new_tokens = try_tokens
                    continue
            argval = inst.argval
            op = inst.opcode
--- a/uncompyle6/semantics/n_actions.py
+++ b/uncompyle6/semantics/n_actions.py
@@ -244,8 +244,6 @@ class NonterminalActions:
                sep = ","
        else:
            for elem in flat_elems:
                if elem.kind != "ADD_VALUE":
                    from trepan.api import debug; debug()
                assert elem.kind == "ADD_VALUE"
                value = elem.pattr
                if elem.linestart is not None:
@@ -395,6 +393,32 @@ class NonterminalActions:
                template = ("**%C", (0, kwargs, ", **"))
                self.template_engine(template, node)
                sep = ""
            if node[0].kind == "COLLECTION_START":
                key_value_pairs = node[1]
                for key_value_pair in key_value_pairs:
                    key, value = key_value_pair
                    if key.linestart is not None:
                        line_number = key.linestart
                    if line_number != self.line_number:
                        sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
                        self.line_number = line_number
                    self.write(sep)
                    self.write(key.pattr)
                    self.write(": ")
                    if value.linestart is not None:
                        line_number = value.linestart
                    if line_number != self.line_number:
                        sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
                        self.line_number = line_number
                    else:
                        sep += " "
                        pass
                    self.write(value.pattr)
                    sep = ", "
                    pass
                if sep.startswith(",\n"):
                    self.write(sep[1:])
                pass
            pass
        else: