diff --git a/test/bytecode_2.7_run/05_long_literals.pyc b/test/bytecode_2.7_run/05_long_literals.pyc new file mode 100644 index 00000000..f2e2a5bd Binary files /dev/null and b/test/bytecode_2.7_run/05_long_literals.pyc differ diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index fc676883..3ce9c9fe 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -312,6 +312,14 @@ class Python2Parser(PythonParser): opname_base = opname[: opname.rfind("_")] + if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"): + rule = """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + expr ::= const_list + """ % opname + self.addRule(rule, nop_func) + # The order of opname listed is roughly sorted below if opname_base in ("BUILD_LIST", "BUILD_SET", "BUILD_TUPLE"): # We do this complicated test to speed up parsing of diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 19f84e71..5e3c74f2 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -748,18 +748,37 @@ class Python3Parser(PythonParser): kvlist_n = "expr " * (token.attr) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) self.addRule(rule, nop_func) + + elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): + if opname == "BUILD_CONST_DICT": + rule = """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + dict ::= const_list + expr ::= dict + """ % opname + else: + rule = """ + add_consts ::= ADD_VALUE* + const_list ::= COLLECTION_START add_consts %s + expr ::= const_list + """ % opname + self.addRule(rule, nop_func) + elif opname.startswith("BUILD_DICT_OLDER"): rule = """dict ::= COLLECTION_START key_value_pairs BUILD_DICT_OLDER key_value_pairs ::= key_value_pair+ key_value_pair ::= ADD_KEY ADD_VALUE """ self.addRule(rule, nop_func) + elif opname.startswith("BUILD_LIST_UNPACK"): v = token.attr rule = "build_list_unpack ::= %s%s" % ("expr " * v, opname) self.addRule(rule, nop_func) rule = "expr ::= build_list_unpack" self.addRule(rule, nop_func) + elif opname_base in ("BUILD_MAP", "BUILD_MAP_UNPACK"): kvlist_n = "kvlist_%s" % token.attr if opname == "BUILD_MAP_n": diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 7ef09b6b..decf03d6 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016, 2018-2021 by Rocky Bernstein +# Copyright (c) 2016, 2018-2022 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -24,7 +24,6 @@ scanners, e.g. for Python 2.7 or 3.4. from typing import Optional from array import array from collections import namedtuple -from sys import intern # noqa from uncompyle6.scanners.tok import Token from xdis.version_info import IS_PYPY, version_tuple_to_str @@ -125,6 +124,80 @@ class Scanner(object): # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() + def bound_collection_from_tokens( + self, tokens, t, i, collection_type + ): + count = t.attr + assert isinstance(count, int) + + assert count <= i + + if collection_type == "CONST_DICT": + # constant dictonaries work via BUILD_CONST_KEY_MAP and + # handle the values() like sets and lists. + # However the keys() are an LOAD_CONST of the keys. + # adjust offset to account for this + count += 1 + + # For small lists don't bother + if count < 5: + return None + + collection_start = i - count + + for j in range(collection_start, i): + if tokens[j].kind not in ( + "LOAD_CONST", + "LOAD_FAST", + "LOAD_GLOBAL", + "LOAD_NAME", + ): + return None + + collection_enum = CONST_COLLECTIONS.index(collection_type) + + # If we go there all instructions before tokens[i] are LOAD_CONST and we can replace + # add a boundary marker and change LOAD_CONST to something else + new_tokens = tokens[:-count] + start_offset = tokens[collection_start].offset + new_tokens.append( + Token( + opname="COLLECTION_START", + attr=collection_enum, + pattr=collection_type, + offset="%s_0" % start_offset, + has_arg=True, + opc=self.opc, + has_extended_arg=False, + ) + ) + for j in range(collection_start, i): + new_tokens.append( + Token( + opname="ADD_VALUE", + attr=tokens[j].attr, + pattr=tokens[j].pattr, + offset=tokens[j].offset, + has_arg=True, + linestart=tokens[j].linestart, + opc=self.opc, + has_extended_arg=False, + ) + ) + new_tokens.append( + Token( + opname="BUILD_%s" % collection_type, + attr=t.attr, + pattr=t.pattr, + offset=t.offset, + has_arg=t.has_arg, + linestart=t.linestart, + opc=t.opc, + has_extended_arg=False, + ) + ) + return new_tokens + def build_instructions(self, co): """ Create a list of instructions (a structured object rather than diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 5a0b8090..db213bab 100644 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -200,7 +200,6 @@ class Scanner2(Scanner): grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules for the specific number of arguments they take. """ - if not show_asm: show_asm = self.show_asm @@ -212,7 +211,7 @@ class Scanner2(Scanner): print(instr.disassemble()) # list of tokens/instructions - tokens = [] + new_tokens = [] # "customize" is in the process of going away here customize = {} @@ -289,7 +288,7 @@ class Scanner2(Scanner): if come_from_type not in ("LOOP", "EXCEPT"): come_from_name = "COME_FROM_%s" % come_from_type pass - tokens.append( + new_tokens.append( Token( come_from_name, jump_offset, @@ -313,6 +312,24 @@ class Scanner2(Scanner): if op == self.opc.EXTENDED_ARG: extended_arg += self.extended_arg_val(oparg) continue + + # Note: name used to match on rather than op since + # BUILD_SET isn't in earlier Pythons. + if op_name in ( + "BUILD_LIST", + "BUILD_SET", + ): + t = Token( + op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc + ) + collection_type = op_name.split("_")[1] + next_tokens = self.bound_collection_from_tokens( + new_tokens, t, len(new_tokens), "CONST_%s" % collection_type + ) + if next_tokens is not None: + new_tokens = next_tokens + continue + if op in self.opc.CONST_OPS: const = co.co_consts[oparg] if iscode(const): @@ -347,12 +364,12 @@ class Scanner2(Scanner): elif op in self.opc.JREL_OPS: # use instead: hasattr(self, 'patch_continue'): ? if self.version[:2] == (2, 7): - self.patch_continue(tokens, offset, op) + self.patch_continue(new_tokens, offset, op) pattr = repr(offset + 3 + oparg) elif op in self.opc.JABS_OPS: # use instead: hasattr(self, 'patch_continue'): ? if self.version[:2] == (2, 7): - self.patch_continue(tokens, offset, op) + self.patch_continue(new_tokens, offset, op) pattr = repr(oparg) elif op in self.opc.LOCAL_OPS: pattr = varnames[oparg] @@ -433,13 +450,13 @@ class Scanner2(Scanner): linestart = self.linestarts.get(offset, None) if offset not in replace: - tokens.append( + new_tokens.append( Token( op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc ) ) else: - tokens.append( + new_tokens.append( Token( replace[offset], oparg, @@ -455,10 +472,10 @@ class Scanner2(Scanner): pass if show_asm in ("both", "after"): - for t in tokens: + for t in new_tokens: print(t.format(line_prefix="")) print() - return tokens, customize + return new_tokens, customize def build_statement_indices(self): code = self.code diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index a24fccc1..32fd2386 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -123,7 +123,9 @@ class Scanner26(scan.Scanner2): i = self.next_stmt[i] extended_arg = 0 + i = -1 for offset in self.op_range(0, codelen): + i += 1 op = self.code[offset] op_name = self.opname[op] oparg = None; pattr = None @@ -156,8 +158,28 @@ class Scanner26(scan.Scanner2): oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == self.opc.EXTENDED_ARG: - extended_arg = oparg * L65536 - continue + extended_arg += self.extended_arg_val(oparg) + continue + + + # Note: name used to match on rather than op since + # BUILD_SET isn't in earlier Pythons. + if op_name in ( + "BUILD_LIST", + "BUILD_SET", + ): + t = Token( + op_name, oparg, pattr, offset, self.linestarts.get(offset, None), op, has_arg, self.opc + ) + + collection_type = op_name.split("_")[1] + next_tokens = self.bound_collection_from_tokens( + tokens, t, i, "CONST_%s" % collection_type + ) + if next_tokens is not None: + tokens = next_tokens + continue + if op in self.opc.CONST_OPS: const = co.co_consts[oparg] # We can't use inspect.iscode() because we may be diff --git a/uncompyle6/scanners/scanner37.py b/uncompyle6/scanners/scanner37.py index 464669c9..2d9e8eca 100644 --- a/uncompyle6/scanners/scanner37.py +++ b/uncompyle6/scanners/scanner37.py @@ -24,8 +24,7 @@ scanner routine for Python 3. from typing import Tuple -from uncompyle6.scanner import CONST_COLLECTIONS -from uncompyle6.scanners.tok import Token +from uncompyle6.scanner import CONST_COLLECTIONS, Token from uncompyle6.scanners.scanner37base import Scanner37Base # bytecode verification, verify(), uses JUMP_OPs from here