From c25fa61e330fd79c2361674b4265c96a6aacd6f5 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 26 Apr 2022 15:37:42 -0400 Subject: [PATCH] Start handling BUILD_MAP (a class of dict) --- uncompyle6/parser.py | 1 + uncompyle6/parsers/parse3.py | 6 ++ uncompyle6/scanner.py | 2 +- uncompyle6/scanners/scanner3.py | 101 ++++++++++++++++++++++++++++++ uncompyle6/semantics/n_actions.py | 28 ++++++++- 5 files changed, 135 insertions(+), 3 deletions(-) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index e3bbfd17..785b0a45 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -61,6 +61,7 @@ class PythonParser(GenericASTBuilder): "except_stmts", "exprlist", "importlist", + "key_value_pairs", "kvlist", "kwargs", "l_stmts", diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 039a51db..19f84e71 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -748,6 +748,12 @@ class Python3Parser(PythonParser): kvlist_n = "expr " * (token.attr) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) self.addRule(rule, nop_func) + elif opname.startswith("BUILD_DICT_OLDER"): + rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER + key_value_pairs ::= key_value_pair+ + key_value_pair ::= ADD_KEY ADD_VALUE + """ + self.addRule(rule, nop_func) elif opname.startswith("BUILD_LIST_UNPACK"): v = token.attr rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 87b6fc3b..7ef09b6b 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -84,7 +84,7 @@ def long(num): return num -CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") +CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT", "CONST_MAP") class Code(object): diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index f20803a2..4e7333b9 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -210,6 +210,10 @@ class Scanner3(Scanner): def bound_collection_from_inst( self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str ) -> Optional[list]: + """ + Try to a sequence of instruction that ends with a BUILD_xxx into a sequence that can + be parsed much faster, but inserting the token boundary at the beginning of the sequence. + """ count = t.attr assert isinstance(count, int) @@ -285,6 +289,94 @@ class Scanner3(Scanner): ) return new_tokens + def bound_map_from_inst( + self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]: + """ + Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can + be parsed much faster, but inserting the token boundary at the beginning of the sequence. + """ + count = t.attr + assert isinstance(count, int) + if count > i: + return None + + # For small lists don't bother + if count < 5: + return None + + collection_start = i - (count * 2) + assert (count * 2) <= i + + for j in range(collection_start, i, 2): + if insts[j].opname not in ( + "LOAD_CONST", + ): + return None + if insts[j+1].opname not in ( + "LOAD_CONST", + ): + return None + + collection_start = i - (2 * count) + collection_enum = CONST_COLLECTIONS.index("CONST_MAP") + + # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace + # add a boundary marker and change LOAD_CONST to something else + new_tokens = next_tokens[:-(2*count)] + start_offset = insts[collection_start].offset + new_tokens.append( + Token( + opname="COLLECTION_START", + attr=collection_enum, + pattr="CONST_MAP", + offset=f"{start_offset}_0", + linestart=False, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + ) + ) + for j in range(collection_start, i, 2): + new_tokens.append( + Token( + opname="ADD_KEY", + attr=insts[j].argval, + pattr=insts[j].argrepr, + offset=insts[j].offset, + linestart=insts[j].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + ) + ) + new_tokens.append( + Token( + opname="ADD_VALUE", + attr=insts[j+1].argval, + pattr=insts[j+1].argrepr, + offset=insts[j+1].offset, + linestart=insts[j+1].starts_line, + has_arg=True, + has_extended_arg=False, + opc=self.opc, + ) + ) + new_tokens.append( + Token( + opname=f"BUILD_DICT_OLDER", + attr=t.attr, + pattr=t.pattr, + offset=t.offset, + linestart=t.linestart, + has_arg=t.has_arg, + has_extended_arg=False, + opc=t.opc, + ) + ) + return new_tokens + + + def ingest(self, co, classname=None, code_objects={}, show_asm=None ) -> Tuple[list, dict]: """ @@ -411,6 +503,15 @@ class Scanner3(Scanner): if try_tokens is not None: new_tokens = try_tokens continue + elif opname in ( + "BUILD_MAP", + ): + try_tokens = self.bound_map_from_inst( + self.insts, new_tokens, inst, t, i, + ) + if try_tokens is not None: + new_tokens = try_tokens + continue argval = inst.argval op = inst.opcode diff --git a/uncompyle6/semantics/n_actions.py b/uncompyle6/semantics/n_actions.py index 30cb0fe0..679266a4 100644 --- a/uncompyle6/semantics/n_actions.py +++ b/uncompyle6/semantics/n_actions.py @@ -244,8 +244,6 @@ class NonterminalActions: sep = "," else: for elem in flat_elems: - if elem.kind != "ADD_VALUE": - from trepan.api import debug; debug() assert elem.kind == "ADD_VALUE" value = elem.pattr if elem.linestart is not None: @@ -395,6 +393,32 @@ class NonterminalActions: template = ("**%C", (0, kwargs, ", **")) self.template_engine(template, node) sep = "" + if node[0].kind == "COLLECTION_START": + key_value_pairs = node[1] + for key_value_pair in key_value_pairs: + key, value = key_value_pair + if key.linestart is not None: + line_number = key.linestart + if line_number != self.line_number: + sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + self.line_number = line_number + self.write(sep) + self.write(key.pattr) + self.write(": ") + if value.linestart is not None: + line_number = value.linestart + if line_number != self.line_number: + sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + self.line_number = line_number + else: + sep += " " + pass + self.write(value.pattr) + sep = ", " + pass + if sep.startswith(",\n"): + self.write(sep[1:]) + pass pass else: