diff --git a/test/bytecode_3.2/05_list_comprehension.pyc b/test/bytecode_3.2/05_list_comprehension.pyc new file mode 100644 index 00000000..5f4b356e Binary files /dev/null and b/test/bytecode_3.2/05_list_comprehension.pyc differ diff --git a/uncompyle6/load.py b/uncompyle6/load.py index b77d751f..f11beaf2 100644 --- a/uncompyle6/load.py +++ b/uncompyle6/load.py @@ -52,7 +52,7 @@ def load_file(filename): fp.close() return co -def load_module(filename): +def load_module(filename, code_objects={}): """ load a module without importing it. load_module(filename: string): version, magic_int, code_object @@ -100,7 +100,7 @@ def load_module(filename): bytecode = fp.read() co = marshal.loads(bytecode) else: - co = uncompyle6.marsh.load_code(fp, magic_int) + co = uncompyle6.marsh.load_code(fp, magic_int, code_objects) pass return version, timestamp, magic_int, co diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 3a2017b9..c0795b35 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -8,7 +8,7 @@ from uncompyle6.semantics import pysource from uncompyle6.load import load_module def uncompyle(version, co, out=None, showasm=False, showast=False, - timestamp=None, showgrammar=False): + timestamp=None, showgrammar=False, code_objects={}): """ disassembles and deparses a given code block 'co' """ @@ -26,7 +26,8 @@ def uncompyle(version, co, out=None, showasm=False, showast=False, file=real_out) try: - pysource.deparse_code(version, co, out, showasm, showast, showgrammar) + pysource.deparse_code(version, co, out, showasm, showast, showgrammar, + code_objects=code_objects) except pysource.WalkerError as e: # deparsing failed if real_out != out: @@ -40,15 +41,17 @@ def uncompyle_file(filename, outstream=None, showasm=False, showast=False, """ filename = check_object_path(filename) - version, timestamp, magic_int, co = load_module(filename) + code_objects = {} + version, timestamp, magic_int, co = load_module(filename, code_objects) + if type(co) == list: for con in co: uncompyle(version, con, outstream, showasm, showast, - timestamp, showgrammar) + timestamp, showgrammar, code_objects=code_objects) else: uncompyle(version, co, outstream, showasm, showast, - timestamp, showgrammar) + timestamp, showgrammar, code_objects=code_objects) co = None def main(in_base, out_base, files, codes, outfile=None, diff --git a/uncompyle6/marsh.py b/uncompyle6/marsh.py index 79772e5d..92256ffa 100644 --- a/uncompyle6/marsh.py +++ b/uncompyle6/marsh.py @@ -30,7 +30,7 @@ if PYTHON3: def compat_str(s): return s.decode('utf-8', errors='ignore') if PYTHON3 else str(s) -def load_code(fp, magic_int): +def load_code(fp, magic_int, code_objects={}): """ marshal.load() written in Python. When the Python bytecode magic loaded is the same magic for the running Python interpreter, we can simply use the @@ -47,9 +47,9 @@ def load_code(fp, magic_int): raise TypeError("File %s doesn't smell like Python bytecode" % fp.name) fp.seek(seek_pos) - return load_code_internal(fp, magic_int) + return load_code_internal(fp, magic_int, code_objects=code_objects) -def load_code_internal(fp, magic_int, bytes_for_s=False): +def load_code_internal(fp, magic_int, bytes_for_s=False, code_objects={}): global internStrings b1 = fp.read(1) @@ -69,16 +69,17 @@ def load_code_internal(fp, magic_int, bytes_for_s=False): if 3000 < magic_int < 20121: fp.read(4) - co_code = load_code_internal(fp, magic_int, bytes_for_s=True) - co_consts = load_code_internal(fp, magic_int) - co_names = load_code_internal(fp, magic_int) - co_varnames = load_code_internal(fp, magic_int) - co_freevars = load_code_internal(fp, magic_int) - co_cellvars = load_code_internal(fp, magic_int) - co_filename = load_code_internal(fp, magic_int) + co_code = load_code_internal(fp, magic_int, bytes_for_s=True, + code_objects=code_objects) + co_consts = load_code_internal(fp, magic_int, code_objects=code_objects) + co_names = load_code_internal(fp, magic_int, code_objects=code_objects) + co_varnames = load_code_internal(fp, magic_int, code_objects=code_objects) + co_freevars = load_code_internal(fp, magic_int, code_objects=code_objects) + co_cellvars = load_code_internal(fp, magic_int, code_objects=code_objects) + co_filename = load_code_internal(fp, magic_int, code_objects=code_objects) co_name = load_code_internal(fp, magic_int) co_firstlineno = unpack('i', fp.read(4))[0] - co_lnotab = load_code_internal(fp, magic_int) + co_lnotab = load_code_internal(fp, magic_int, code_objects=code_objects) # The Python3 code object is different than Python2's which # we are reading if we get here. # Also various parameters which were strings are now @@ -87,13 +88,13 @@ def load_code_internal(fp, magic_int, bytes_for_s=False): if PYTHON_MAGIC_INT > 3020: # In later Python3 magic_ints, there is a # kwonlyargcount parameter which we set to 0. - return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, + code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, co_code, co_consts, co_names, co_varnames, co_filename, co_name, co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_freevars, co_cellvars) else: - return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, + code = Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, co_code, co_consts, co_names, co_varnames, co_filename, co_name, co_firstlineno, bytes(co_lnotab, encoding='utf-8'), @@ -107,9 +108,11 @@ def load_code_internal(fp, magic_int, bytes_for_s=False): co_varnames = tuple([str(s) if s else None for s in co_varnames]) co_filename = str(co_filename) co_name = str(co_name) - return Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code, + code = Code(co_argcount, co_nlocals, co_stacksize, co_flags, co_code, co_consts, co_names, co_varnames, co_filename, co_name, co_firstlineno, co_lnotab, co_freevars, co_cellvars) + code_objects[str(code)] = code + return code # const type elif marshalType == '.': @@ -177,7 +180,7 @@ def load_code_internal(fp, magic_int, bytes_for_s=False): tuplesize = unpack('i', fp.read(4))[0] ret = tuple() while tuplesize > 0: - ret += load_code_internal(fp, magic_int), + ret += load_code_internal(fp, magic_int, code_objects=code_objects), tuplesize -= 1 return ret elif marshalType == '[': diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 52047074..b156b4f1 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -32,14 +32,6 @@ else: from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34 -class GenericPythonCode: - ''' - Class for representing code-like objects across different versions of - Python. - ''' - def __init__(self): - return - class Code: ''' Class for representing code-objects. @@ -321,7 +313,7 @@ if __name__ == "__main__": import inspect, uncompyle6 co = inspect.currentframe().f_code scanner = get_scanner(uncompyle6.PYTHON_VERSION) - tokens, customize = scanner.disassemble(co) + tokens, customize = scanner.disassemble(co, {}) print('-' * 30) for t in tokens: print(t) diff --git a/uncompyle6/scanners/scanner25.py b/uncompyle6/scanners/scanner25.py index 2e0a0efd..499e89ac 100755 --- a/uncompyle6/scanners/scanner25.py +++ b/uncompyle6/scanners/scanner25.py @@ -23,7 +23,7 @@ class Scanner25(scan.Scanner): def __init__(self): scan.Scanner.__init__(self, 2.5) # check - def disassemble(self, co, classname=None): + def disassemble(self, co, classname=None, code_objects={}): ''' Disassemble a code object, returning a list of 'Token'. diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index fecf951e..d87ae76c 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -22,7 +22,7 @@ class Scanner26(scan.Scanner): def __init__(self): scan.Scanner.__init__(self, 2.5) # check - def disassemble(self, co, classname=None): + def disassemble(self, co, classname=None, code_objects={}): ''' Disassemble a code object, returning a list of 'Token'. diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index 42584b0a..fb4c642a 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -24,7 +24,7 @@ class Scanner27(scan.Scanner): def __init__(self): scan.Scanner.__init__(self, 2.7) # check - def disassemble(self, co, classname=None): + def disassemble(self, co, classname=None, code_objects={}): """ Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after @@ -341,7 +341,7 @@ class Scanner27(scan.Scanner): def detect_structure(self, pos, op=None): ''' - Detect type of block structures and their boundaries to fix optimizied jumps + Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ ''' diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 89ea3533..b937f441 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -1,6 +1,6 @@ # Copyright (c) 2015 by Rocky Bernstein """ -Python 3 Generic ytecode scanner/deparser +Python 3 Generic bytecode scanner/deparser This overlaps various Python3's dis module, but it can be run from Python 2 and other versions of Python. Also, we save token information @@ -9,7 +9,7 @@ for later use in deparsing. from __future__ import print_function -import dis, re +import dis from collections import namedtuple from array import array @@ -28,7 +28,7 @@ class Scanner3(scan.Scanner): def __init__(self): scan.Scanner.__init__(self, PYTHON_VERSION) - def disassemble_generic(self, co, classname=None): + def disassemble_generic(self, co, classname=None, code_objects={}): """ Convert code object into a sequence of tokens. @@ -41,6 +41,7 @@ class Scanner3(scan.Scanner): codelen = len(code) self.build_lines_data(co) self.build_prev_op() + self.code_objects = code_objects # self.lines contains (block,addrLastInstr) if classname: @@ -117,18 +118,12 @@ class Scanner3(scan.Scanner): if op in hasconst: const = co.co_consts[oparg] if not PYTHON3 and isinstance(const, str): - m = re.search('^', const) - if m: - const = scan.GenericPythonCode() - const.co_name = m.group(1) - const.co_filenaame = m.group(3) - const.co_firstlineno = m.group(4) - pass - # We can't use inspect.iscode() because we may be + if const in code_objects: + const = code_objects[const] + # Not sure if'we can inspect.iscode() because we may be # using a different version of Python than the - # one that this was byte-compiled on. So the code - # types may mismatch. + # one that this was byte-compiled on. Is probably okay, + # but we'll use hasattr instead here. if hasattr(const, 'co_name'): oparg = const if const.co_name == '': @@ -425,11 +420,15 @@ class Scanner3(scan.Scanner): def detect_structure(self, offset): """ - Detect structures and their boundaries to fix optimizied jumps + Detect structures and their boundaries to fix optimized jumps in python2.3+ """ + + # TODO: check the struct boundaries more precisely -Dan + code = self.code op = code[offset] + # Detect parent structure parent = self.structs[0] start = parent['start'] diff --git a/uncompyle6/scanners/scanner32.py b/uncompyle6/scanners/scanner32.py index f197015e..44279339 100644 --- a/uncompyle6/scanners/scanner32.py +++ b/uncompyle6/scanners/scanner32.py @@ -17,8 +17,8 @@ JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs class Scanner32(scan3.Scanner3): - def disassemble(self, co, classname=None): - return self.disassemble_generic(co, classname) + def disassemble(self, co, classname=None, code_objects={}): + return self.disassemble_generic(co, classname, code_objects=code_objects) if __name__ == "__main__": import inspect diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py index 0c1f3de6..67cd5912 100644 --- a/uncompyle6/scanners/scanner33.py +++ b/uncompyle6/scanners/scanner33.py @@ -17,8 +17,8 @@ JUMP_OPs = uncompyle6.opcodes.opcode_33.JUMP_OPs class Scanner33(scan3.Scanner3): - def disassemble(self, co, classname=None): - return self.disassemble_generic(co, classname) + def disassemble(self, co, classname=None, code_objects={}): + return self.disassemble_generic(co, classname, code_objects=code_objects) if __name__ == "__main__": import inspect diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index a5a22a94..86b63dc6 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -28,12 +28,13 @@ from uncompyle6.opcodes.opcode_34 import * class Scanner34(scan3.Scanner3): - def disassemble(self, co, classname=None): + def disassemble(self, co, classname=None, code_objects={}): fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \ else self.disassemble_generic - return fn(co, classname) + return fn(co, classname, code_objects=code_objects) - def disassemble_built_in(self, co, classname=None): + def disassemble_built_in(self, co, classname=None, + code_objects={}): # Container for tokens tokens = [] customize = {} diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 85359934..4841599c 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -67,7 +67,7 @@ from uncompyle6 import PYTHON3 from uncompyle6.parser import get_python_parser from uncompyle6.parsers.astnode import AST from uncompyle6.parsers.spark import GenericASTTraversal, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.scanner import Code, GenericPythonCode, get_scanner +from uncompyle6.scanner import Code, get_scanner from uncompyle6.scanners.tok import Token, NoneToken import uncompyle6.parser as python_parser @@ -981,10 +981,6 @@ class Walker(GenericASTTraversal, object): self.prec = 27 code = node[code_index].attr - if isinstance(code, GenericPythonCode): - self.write(' for i_am in ["Python 2-3 deparsing limitation"]') - return - assert inspect.iscode(code) code = Code(code, self.scanner, self.currentclass) @@ -1031,10 +1027,6 @@ class Walker(GenericASTTraversal, object): self.prec = 27 code = node[code_index].attr - if isinstance(code, GenericPythonCode): - self.write(' for i_am in ["Python 2-3 deparsing limitation"]') - return - assert inspect.iscode(code) code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1454,10 +1446,6 @@ class Walker(GenericASTTraversal, object): defparams = node[:node[-1].attr] code = node[code_index].attr - if isinstance(code, GenericPythonCode): - self.write('(limitation="Cross Python 2/3 deparsing")') - return - assert inspect.iscode(code) code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1631,7 +1619,7 @@ class Walker(GenericASTTraversal, object): return ast def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False, - showgrammar=False): + showgrammar=False, code_objects={}): """ disassembles and deparses a given code block 'co' """ @@ -1640,7 +1628,7 @@ def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False, # store final output stream for case of error scanner = get_scanner(version) - tokens, customize = scanner.disassemble(co) + tokens, customize = scanner.disassemble(co, code_objects=code_objects) if showasm: for t in tokens: print(t)