diff --git a/test/bytecode_3.8_run/05_long_literals.pyc b/test/bytecode_3.8_run/05_long_literals.pyc new file mode 100644 index 00000000..f952df75 Binary files /dev/null and b/test/bytecode_3.8_run/05_long_literals.pyc differ diff --git a/test/simple_source/expression/05_long_literals.py b/test/simple_source/expression/05_long_literals.py index e67b3006..f9589a7e 100644 --- a/test/simple_source/expression/05_long_literals.py +++ b/test/simple_source/expression/05_long_literals.py @@ -1306,7 +1306,7 @@ assert tuple(x.keys()) == (1, 3) # Try a long dictionary. # This should not be slow as it has been in the past values = { - "valuea": a + 1, + "value1": x, "value2": 2 + 1, "value3": 3 + 1, "value4": 4 + 1, @@ -1811,3 +1811,46 @@ values = { } assert list(values.values()) == list(range(2, 502 + 2)) + + +# Try a long dictionary that fails because we have a binary op. +# We can get a expr32 grouping speedup +# which is slower than if this were all constant. +# The above was not implemented at the time this test was written. +values = { + "value1": x + 1, # This is a binary op not consant + "value2": 2, + "value3": 3, + "value4": 4, + "value5": 5, + "value6": 6, + "value7": 7, + "value8": 8, + "value9": 9, + "value10": 10, + "value11": 11, + "value12": 12, + "value13": 13, + "value14": 14, + "value15": 15, + "value16": 16, + "value17": 17, + "value18": 18, + "value19": 19, + "value20": 20, + "value21": 21, + "value22": 22, + "value23": 23, + "value24": 24, + "value25": 25, + "value26": 26, + "value27": 27, + "value28": 28, + "value29": 29, + "value30": 30, + "value31": 31, + "value32": 32, + "value33": 33, +} + +assert list(values.values()) == list(range(2, 502 + 2)) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 7baf8fb5..30dcab2d 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -84,6 +84,9 @@ def long(num): return num +CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") + + class Code(object): """ Class for representing code-objects. @@ -122,6 +125,80 @@ class Scanner(object): # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() + def bound_collection( + self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str + ): + count = t.attr + assert isinstance(count, int) + + assert count <= i + + if collection_type == "CONST_DICT": + # constant dictonaries work via BUILD_CONST_KEY_MAP and + # handle the values() like sets and lists. + # However the keys() are an LOAD_CONST of the keys. + # adjust offset to account for this + count += 1 + + # For small lists don't bother + if count < 5: + return next_tokens + [t] + + collection_start = i - count + + for j in range(collection_start, i): + if tokens[j].kind not in ( + "LOAD_CONST", + "LOAD_FAST", + "LOAD_GLOBAL", + "LOAD_NAME", + ): + return next_tokens + [t] + + collection_enum = CONST_COLLECTIONS.index(collection_type) + + # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace + # add a boundary marker and change LOAD_CONST to something else + new_tokens = next_tokens[:-count] + start_offset = tokens[collection_start].offset + new_tokens.append( + Token( + opname="COLLECTION_START", + attr=collection_enum, + pattr=collection_type, + offset=f"{start_offset}_0", + has_arg=True, + opc=self.opc, + has_extended_arg=False, + ) + ) + for j in range(collection_start, i): + new_tokens.append( + Token( + opname="ADD_VALUE", + attr=tokens[j].attr, + pattr=tokens[j].pattr, + offset=tokens[j].offset, + has_arg=True, + linestart=tokens[j].linestart, + opc=self.opc, + has_extended_arg=False, + ) + ) + new_tokens.append( + Token( + opname=f"BUILD_{collection_type}", + attr=t.attr, + pattr=t.pattr, + offset=t.offset, + has_arg=t.has_arg, + linestart=t.linestart, + opc=t.opc, + has_extended_arg=False, + ) + ) + return new_tokens + def build_instructions(self, co): """ Create a list of instructions (a structured object rather than diff --git a/uncompyle6/scanners/scanner37base.py b/uncompyle6/scanners/scanner37base.py index bb6061bb..2c6410e1 100644 --- a/uncompyle6/scanners/scanner37base.py +++ b/uncompyle6/scanners/scanner37base.py @@ -47,9 +47,6 @@ import sys globals().update(op3.opmap) -CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT") - - class Scanner37Base(Scanner): def __init__(self, version, show_asm=None, is_pypy=False): super(Scanner37Base, self).__init__(version, show_asm, is_pypy) @@ -184,80 +181,6 @@ class Scanner37Base(Scanner): # self.varargs_ops = frozenset(self.opc.hasvargs) return - def bound_collection( - self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str - ): - count = t.attr - assert isinstance(count, int) - - assert count <= i - - if collection_type == "CONST_DICT": - # constant dictonaries work via BUILD_CONST_KEY_MAP and - # handle the values() like sets and lists. - # However the keys() are an LOAD_CONST of the keys. - # adjust offset to account for this - count += 1 - - # For small lists don't bother - if count < 5: - return next_tokens + [t] - - collection_start = i - count - - for j in range(collection_start, i): - if tokens[j].kind not in ( - "LOAD_CONST", - "LOAD_FAST", - "LOAD_GLOBAL", - "LOAD_NAME", - ): - return next_tokens + [t] - - collection_enum = CONST_COLLECTIONS.index(collection_type) - - # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace - # add a boundary marker and change LOAD_CONST to something else - new_tokens = next_tokens[:-count] - start_offset = tokens[collection_start].offset - new_tokens.append( - Token( - opname="COLLECTION_START", - attr=collection_enum, - pattr=collection_type, - offset=f"{start_offset}_0", - has_arg=True, - opc=self.opc, - has_extended_arg=False, - ) - ) - for j in range(collection_start, i): - new_tokens.append( - Token( - opname="ADD_VALUE", - attr=tokens[j].attr, - pattr=tokens[j].pattr, - offset=tokens[j].offset, - has_arg=True, - linestart=tokens[j].linestart, - opc=self.opc, - has_extended_arg=False, - ) - ) - new_tokens.append( - Token( - opname=f"BUILD_{collection_type}", - attr=t.attr, - pattr=t.pattr, - offset=t.offset, - has_arg=t.has_arg, - linestart=t.linestart, - opc=t.opc, - has_extended_arg=False, - ) - ) - return new_tokens - def ingest(self, co, classname=None, code_objects={}, show_asm=None): """ Pick out tokens from an uncompyle6 code object, and transform them, diff --git a/uncompyle6/semantics/n_actions.py b/uncompyle6/semantics/n_actions.py index 54f98aa9..d108b742 100644 --- a/uncompyle6/semantics/n_actions.py +++ b/uncompyle6/semantics/n_actions.py @@ -274,6 +274,189 @@ class NonterminalActions: n_store_subscript = n_subscript = n_delete_subscript + def n_dict(self, node): + """ + Prettyprint a dict. + 'dict' is something like k = {'a': 1, 'b': 42}" + We will use source-code line breaks to guide us when to break. + """ + if len(node) == 1 and node[0] == "const_list": + self.preorder(node[0]) + self.prune() + return + + p = self.prec + self.prec = 100 + + self.indent_more(INDENT_PER_LEVEL) + sep = INDENT_PER_LEVEL[:-1] + if node[0] != "dict_entry": + self.write("{") + line_number = self.line_number + + if self.version >= (3, 0) and not self.is_pypy: + if node[0].kind.startswith("kvlist"): + # Python 3.5+ style key/value list in dict + kv_node = node[0] + l = list(kv_node) + length = len(l) + if kv_node[-1].kind.startswith("BUILD_MAP"): + length -= 1 + i = 0 + + # Respect line breaks from source + while i < length: + self.write(sep) + name = self.traverse(l[i], indent="") + if i > 0: + line_number = self.indent_if_source_nl( + line_number, self.indent + INDENT_PER_LEVEL[:-1] + ) + line_number = self.line_number + self.write(name, ": ") + value = self.traverse( + l[i + 1], indent=self.indent + (len(name) + 2) * " " + ) + self.write(value) + sep = ", " + if line_number != self.line_number: + sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + line_number = self.line_number + i += 2 + pass + pass + elif len(node) > 1 and node[1].kind.startswith("kvlist"): + # Python 3.0..3.4 style key/value list in dict + kv_node = node[1] + l = list(kv_node) + if len(l) > 0 and l[0].kind == "kv3": + # Python 3.2 does this + kv_node = node[1][0] + l = list(kv_node) + i = 0 + while i < len(l): + self.write(sep) + name = self.traverse(l[i + 1], indent="") + if i > 0: + line_number = self.indent_if_source_nl( + line_number, self.indent + INDENT_PER_LEVEL[:-1] + ) + pass + line_number = self.line_number + self.write(name, ": ") + value = self.traverse( + l[i], indent=self.indent + (len(name) + 2) * " " + ) + self.write(value) + sep = ", " + if line_number != self.line_number: + sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + line_number = self.line_number + else: + sep += " " + i += 3 + pass + pass + elif node[-1].kind.startswith("BUILD_CONST_KEY_MAP"): + # Python 3.6+ style const map + keys = node[-2].pattr + values = node[:-2] + # FIXME: Line numbers? + for key, value in zip(keys, values): + self.write(sep) + self.write(repr(key)) + line_number = self.line_number + self.write(":") + self.write(self.traverse(value[0])) + sep = ", " + if line_number != self.line_number: + sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + line_number = self.line_number + else: + sep += " " + pass + pass + if sep.startswith(",\n"): + self.write(sep[1:]) + pass + elif node[0].kind.startswith("dict_entry"): + assert self.version >= (3, 5) + template = ("%C", (0, len(node[0]), ", **")) + self.template_engine(template, node[0]) + sep = "" + elif node[-1].kind.startswith("BUILD_MAP_UNPACK") or node[ + -1 + ].kind.startswith("dict_entry"): + assert self.version >= (3, 5) + # FIXME: I think we can intermingle dict_comp's with other + # dictionary kinds of things. The most common though is + # a sequence of dict_comp's + kwargs = node[-1].attr + template = ("**%C", (0, kwargs, ", **")) + self.template_engine(template, node) + sep = "" + + pass + else: + # Python 2 style kvlist. Find beginning of kvlist. + indent = self.indent + " " + line_number = self.line_number + if node[0].kind.startswith("BUILD_MAP"): + if len(node) > 1 and node[1].kind in ("kvlist", "kvlist_n"): + kv_node = node[1] + else: + kv_node = node[1:] + self.kv_map(kv_node, sep, line_number, indent) + else: + sep = "" + opname = node[-1].kind + if self.is_pypy and self.version >= (3, 5): + if opname.startswith("BUILD_CONST_KEY_MAP"): + keys = node[-2].attr + # FIXME: DRY this and the above + for i in range(len(keys)): + key = keys[i] + value = self.traverse(node[i], indent="") + self.write(sep, key, ": ", value) + sep = ", " + if line_number != self.line_number: + sep += "\n" + self.indent + " " + line_number = self.line_number + pass + pass + pass + else: + if opname.startswith("kvlist"): + list_node = node[0] + else: + list_node = node + + assert list_node[-1].kind.startswith("BUILD_MAP") + for i in range(0, len(list_node) - 1, 2): + key = self.traverse(list_node[i], indent="") + value = self.traverse(list_node[i + 1], indent="") + self.write(sep, key, ": ", value) + sep = ", " + if line_number != self.line_number: + sep += "\n" + self.indent + " " + line_number = self.line_number + pass + pass + pass + elif opname.startswith("kvlist"): + kv_node = node[-1] + self.kv_map(node[-1], sep, line_number, indent) + + pass + pass + if sep.startswith(",\n"): + self.write(sep[1:]) + if node[0] != "dict_entry": + self.write("}") + self.indent_less(INDENT_PER_LEVEL) + self.prec = p + self.prune() + def n_docstring(self, node): indent = self.indent diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index bea3f36b..52a7014e 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -696,184 +696,6 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): pass pass - def n_dict(self, node): - """ - prettyprint a dict - 'dict' is something like k = {'a': 1, 'b': 42}" - We will use source-code line breaks to guide us when to break. - """ - p = self.prec - self.prec = 100 - - self.indent_more(INDENT_PER_LEVEL) - sep = INDENT_PER_LEVEL[:-1] - if node[0] != "dict_entry": - self.write("{") - line_number = self.line_number - - if self.version >= (3, 0) and not self.is_pypy: - if node[0].kind.startswith("kvlist"): - # Python 3.5+ style key/value list in dict - kv_node = node[0] - l = list(kv_node) - length = len(l) - if kv_node[-1].kind.startswith("BUILD_MAP"): - length -= 1 - i = 0 - - # Respect line breaks from source - while i < length: - self.write(sep) - name = self.traverse(l[i], indent="") - if i > 0: - line_number = self.indent_if_source_nl( - line_number, self.indent + INDENT_PER_LEVEL[:-1] - ) - line_number = self.line_number - self.write(name, ": ") - value = self.traverse( - l[i + 1], indent=self.indent + (len(name) + 2) * " " - ) - self.write(value) - sep = ", " - if line_number != self.line_number: - sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] - line_number = self.line_number - i += 2 - pass - pass - elif len(node) > 1 and node[1].kind.startswith("kvlist"): - # Python 3.0..3.4 style key/value list in dict - kv_node = node[1] - l = list(kv_node) - if len(l) > 0 and l[0].kind == "kv3": - # Python 3.2 does this - kv_node = node[1][0] - l = list(kv_node) - i = 0 - while i < len(l): - self.write(sep) - name = self.traverse(l[i + 1], indent="") - if i > 0: - line_number = self.indent_if_source_nl( - line_number, self.indent + INDENT_PER_LEVEL[:-1] - ) - pass - line_number = self.line_number - self.write(name, ": ") - value = self.traverse( - l[i], indent=self.indent + (len(name) + 2) * " " - ) - self.write(value) - sep = ", " - if line_number != self.line_number: - sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] - line_number = self.line_number - else: - sep += " " - i += 3 - pass - pass - elif node[-1].kind.startswith("BUILD_CONST_KEY_MAP"): - # Python 3.6+ style const map - keys = node[-2].pattr - values = node[:-2] - # FIXME: Line numbers? - for key, value in zip(keys, values): - self.write(sep) - self.write(repr(key)) - line_number = self.line_number - self.write(":") - self.write(self.traverse(value[0])) - sep = ", " - if line_number != self.line_number: - sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] - line_number = self.line_number - else: - sep += " " - pass - pass - if sep.startswith(",\n"): - self.write(sep[1:]) - pass - elif node[0].kind.startswith("dict_entry"): - assert self.version >= (3, 5) - template = ("%C", (0, len(node[0]), ", **")) - self.template_engine(template, node[0]) - sep = "" - elif node[-1].kind.startswith("BUILD_MAP_UNPACK") or node[ - -1 - ].kind.startswith("dict_entry"): - assert self.version >= (3, 5) - # FIXME: I think we can intermingle dict_comp's with other - # dictionary kinds of things. The most common though is - # a sequence of dict_comp's - kwargs = node[-1].attr - template = ("**%C", (0, kwargs, ", **")) - self.template_engine(template, node) - sep = "" - - pass - else: - # Python 2 style kvlist. Find beginning of kvlist. - indent = self.indent + " " - line_number = self.line_number - if node[0].kind.startswith("BUILD_MAP"): - if len(node) > 1 and node[1].kind in ("kvlist", "kvlist_n"): - kv_node = node[1] - else: - kv_node = node[1:] - self.kv_map(kv_node, sep, line_number, indent) - else: - sep = "" - opname = node[-1].kind - if self.is_pypy and self.version >= (3, 5): - if opname.startswith("BUILD_CONST_KEY_MAP"): - keys = node[-2].attr - # FIXME: DRY this and the above - for i in range(len(keys)): - key = keys[i] - value = self.traverse(node[i], indent="") - self.write(sep, key, ": ", value) - sep = ", " - if line_number != self.line_number: - sep += "\n" + self.indent + " " - line_number = self.line_number - pass - pass - pass - else: - if opname.startswith("kvlist"): - list_node = node[0] - else: - list_node = node - - assert list_node[-1].kind.startswith("BUILD_MAP") - for i in range(0, len(list_node) - 1, 2): - key = self.traverse(list_node[i], indent="") - value = self.traverse(list_node[i + 1], indent="") - self.write(sep, key, ": ", value) - sep = ", " - if line_number != self.line_number: - sep += "\n" + self.indent + " " - line_number = self.line_number - pass - pass - pass - elif opname.startswith("kvlist"): - kv_node = node[-1] - self.kv_map(node[-1], sep, line_number, indent) - - pass - pass - if sep.startswith(",\n"): - self.write(sep[1:]) - if node[0] != "dict_entry": - self.write("}") - self.indent_less(INDENT_PER_LEVEL) - self.prec = p - self.prune() - def template_engine(self, entry, startnode): """The format template interpetation engine. See the comment at the beginning of this module for the how we interpret format