diff --git a/PKG-INFO b/PKG-INFO index 0c06650e..25fb4faf 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -2,7 +2,7 @@ Metadata-Version: 2.0 Name: uncompyle6 Version: 2.0.1 Summary: Python byte-code to source-code converter -Home-page: http://github.com/rocky/uncompyle6 +Home-page: http://github.com/rocky/python-uncompyle6 Author: Rocky Author-email: rb@dustyfeet.com License: GPLv3 diff --git a/README.rst b/README.rst index a8e2f272..d08f136f 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,7 @@ uncompyle6 ========== -A CPython 2.x and possibly 3.x byte-code disassembler and -adecompiler. +A Python 2.x and possibly 3.x byte-code decompiler. This is written in Python 2.7 but is Python3 compatible. @@ -10,46 +9,34 @@ This is written in Python 2.7 but is Python3 compatible. Introduction ------------ -'uncompyle6' converts Python byte-code back into equivalent Python +_uncompyle6_ converts Python byte-code back into equivalent Python source code. It accepts byte-codes from Python version 2.5 to 2.7. -It runs on Python 2.7 and, with a little more work, on Python 3 as well. +It runs on Python 2.7 and with a little more work Python 3. The generated source is fairly readable: docstrings, lists, tuples and hashes are somewhat pretty-printed. -'uncompyle6' is based on John Aycock's generic small languages -compiler 'spark' (http://pages.cpsc.ucalgary.ca/~aycock/spark/) and his +_uncompyle6_ is based on John Aycock's generic small languages +compiler 'spark' (http://www.csr.uvic.ca/~aycock/python/) and his prior work on a tool called 'decompyle'. This was improved by Hartmut Goebel -http://www.crazy-compilers.com +`http://www.crazy-compilers.com/`_ -In order to the decompile a program, we need to be able to disassemble -it first. And this process may be useful in of itself. So we provide a -utility for just that piece as well. +# Additional note (3 July 2004): -'pydisassemble' gives a CPython disassembly of Python byte-code. How -is this different than what Python already provides via the "dis" -module? Here, we can cross disassemble bytecodes from different -versions of CPython than the version of CPython that is doing the -disassembly. +This software is no longer available from the original website. +However http://www.crazy-compilers.com/decompyle/ provides a +decompilation service. -'pydisassemble works on the same versions as 'uncompyle6' and handles the -same sets of CPython bytecode versions. - -*Note from 3 July 2004:* - -This software was original available from http://www.crazy-compilers.com; -http://www.crazy-compilers.com/decompyle/ provides a decompilation service. - -*Note (5 June 2012):* +# Additional note (5 June 2012): The decompilation of python bytecode 2.5 & 2.6 is based on the work of Eloi Vanderbeken. bytecode is translated to a pseudo 2.7 python bytecode and then decompiled. -*Note (12 Dec 2016):* +# Additional note (12 Dec 2016): -This project will be used to deparse fragments of code inside my -trepan_ debuggers_. For that, I need to record text fragements for all +I will be using this to deparse fragments of code inside my trepan_ +debuggers_. For that, I need to record text fragements for all byte-code offsets (of interest). This purpose although largely compatible with the original intention is yet a little bit different. @@ -80,8 +67,6 @@ Installation This uses setup.py, so it follows the standard Python routine: -:: - python setup.py install # may need sudo # or if you have pyenv: python setup.py develop @@ -103,18 +88,15 @@ Usage Run -:: - ./scripts/uncompyle6 -h - for usage help Known Bugs/Restrictions ----------------------- -Support for Python 3 bytecode and syntax is lacking. +Support Python 3 bytecode and syntax is lacking. .. _trepan: https://pypi.python.org/pypi/trepan .. _debuggers: https://pypi.python.org/pypi/trepan3k diff --git a/bin/pydissassemble b/bin/pydissassemble index 9be906d3..e8ca97a2 100755 --- a/bin/pydissassemble +++ b/bin/pydissassemble @@ -38,11 +38,11 @@ def disassemble_code(version, co, out=None): assert isinstance(co, types.CodeType) # store final output stream for case of error - __real_out = out or sys.stdout - print('# Python %s' % version, file=__real_out) + real_out = out or sys.stdout + print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, - file=__real_out) + file=real_out) # Pick up appropriate scanner if version == 2.7: @@ -63,6 +63,11 @@ def disassemble_code(version, co, out=None): scanner.setShowAsm(True, out) tokens, customize = scanner.disassemble(co) + for t in tokens: + print(t, file=real_out) + print(file=out) + + def disassemble_file(filename, outstream=None, showasm=False, showast=False): """ diff --git a/uncompyle6/__init__.py b/uncompyle6/__init__.py index 6d6a11b8..0fe86601 100644 --- a/uncompyle6/__init__.py +++ b/uncompyle6/__init__.py @@ -81,11 +81,11 @@ def load_module(filename): # print version fp.read(4) # timestamp + magic_int = magics.magic2int(magic) if version == PYTHON_VERSION: - magic_int = magics.magic2int(magic) # Note: a higher magic number necessarily mean a later - # release. At Pyton 3.0 the magic number decreased + # release. At Python 3.0 the magic number decreased # significantly. Hence the range below. Also note # inclusion of the size info, occurred within a # Python magor/minor release. Hence the test on the @@ -95,7 +95,7 @@ def load_module(filename): bytecode = fp.read() co = marshal.loads(bytecode) else: - co = disas.load(fp) + co = disas.load(fp, magic_int) pass return version, co @@ -108,11 +108,11 @@ def uncompyle(version, co, out=None, showasm=False, showast=False): assert isinstance(co, types.CodeType) # store final output stream for case of error - __real_out = out or sys.stdout - print('# Python %s' % version, file=__real_out) + real_out = out or sys.stdout + print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, - file=__real_out) + file=real_out) # Pick up appropriate scanner if version == 2.7: @@ -133,12 +133,17 @@ def uncompyle(version, co, out=None, showasm=False, showast=False): scanner.setShowAsm(showasm, out) tokens, customize = scanner.disassemble(co) + if showasm: + for t in tokens: + print(t, file=real_out) + print(file=out) + # Build AST from disassembly. walk = walker.Walker(out, scanner, showast=showast) try: ast = walk.build_ast(tokens, customize) except walker.ParserError as e : # parser failed, dump disassembly - print(e, file=__real_out) + print(e, file=real_out) raise del tokens # save memory diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index e25ee534..74528b62 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -36,7 +36,7 @@ def marshalLoad(fp): internStrings = [] return load(fp) -def load(fp): +def load(fp, magic_int): """ marshal.load() written in Python. When the Python bytecode magic loaded is the same magic for the running Python interpreter, we can simply use the @@ -51,27 +51,34 @@ def load(fp): if marshalType == 'c': Code = types.CodeType + # FIXME If 'i' is deprecated, what would we use? co_argcount = unpack('i', fp.read(4))[0] co_nlocals = unpack('i', fp.read(4))[0] co_stacksize = unpack('i', fp.read(4))[0] co_flags = unpack('i', fp.read(4))[0] - co_code = load(fp) - co_consts = load(fp) - co_names = load(fp) - co_varnames = load(fp) - co_freevars = load(fp) - co_cellvars = load(fp) - co_filename = load(fp) - co_name = load(fp) + # FIXME: somewhere between Python 2.7 and python 3.2 there's + # another 4 bytes before we get to the bytecode. What's going on? + # Again, because magic ints decreased between python 2.7 and 3.0 we need + # a range here. + if 3000 < magic_int < 20121: + fp.read(4) + co_code = load(fp, magic_int) + co_consts = load(fp, magic_int) + co_names = load(fp, magic_int) + co_varnames = load(fp, magic_int) + co_freevars = load(fp, magic_int) + co_cellvars = load(fp, magic_int) + co_filename = load(fp, magic_int) + co_name = load(fp, magic_int) co_firstlineno = unpack('i', fp.read(4))[0] - co_lnotab = load(fp) + co_lnotab = load(fp, magic_int) # The Python3 code object is different than Python2's which # we are reading if we get here. # Also various parameters which were strings are now # bytes (which is probably more logical). if PYTHON3: if PYTHON_MAGIC_INT > 3020: - # In later Python3 versions, there is a + # In later Python3 magic_ints, there is a # kwonlyargcount parameter which we set to 0. return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, bytes(co_code, encoding='utf-8'), @@ -152,7 +159,7 @@ def load(fp): tuplesize = unpack('i', fp.read(4))[0] ret = tuple() while tuplesize > 0: - ret += load(fp), + ret += load(fp, magic_int), tuplesize -= 1 return ret elif marshalType == '[': diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 265a6bd4..5963e2c0 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -21,7 +21,7 @@ if (sys.version_info > (3, 0)): else: L65536 = long(65536) -from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_34 +from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_34 class Token: @@ -31,7 +31,7 @@ class Token: A byte-code token is equivalent to the contents of one line as output by dis.dis(). ''' - def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False): + def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=None): self.type = intern(type_) self.attr = attr self.pattr = pattr @@ -51,9 +51,9 @@ class Token: def __str__(self): pattr = self.pattr if self.linestart: - return '\n%s\t%-17s %r' % (self.offset, self.type, pattr) + return '\n%4d %6s\t%-17s %r' % (self.linestart, self.offset, self.type, pattr) else: - return '%s\t%-17s %r' % (self.offset, self.type, pattr) + return ' %6s\t%-17s %r' % (self.offset, self.type, pattr) def __hash__(self): return hash(self.type) diff --git a/uncompyle6/scanners/scanner25.py b/uncompyle6/scanners/scanner25.py index 6ec191d4..d26e4b20 100644 --- a/uncompyle6/scanners/scanner25.py +++ b/uncompyle6/scanners/scanner25.py @@ -35,8 +35,14 @@ class Scanner25(scan.Scanner): if self.code[i] in (RETURN_VALUE, END_FINALLY): n = i + 1 self.code = array('B', co.co_code[:n]) - # linestarts contains bloc code adresse (addr,block) + + # linestarts is a tuple of (offset, line number. + # Turn that in a has that we can index self.linestarts = list(dis.findlinestarts(co)) + linestartoffsets = {} + for offset, lineno in self.linestarts: + linestartoffsets[offset] = lineno + self.prev = [0] # class and names @@ -72,7 +78,13 @@ class Scanner25(scan.Scanner): linestarts = self.linestarts self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) - linestartoffsets = {a for (a, _) in linestarts} + + # linestarts is a tuple of (offset, line number). + # Turn that in a has that we can index + linestartoffsets = {} + for offset, lineno in linestarts: + linestartoffsets[offset] = lineno + (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: @@ -202,16 +214,16 @@ class Scanner25(scan.Scanner): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset not in replace: - rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) + if offset in linestartoffsets: + linestart = linestartoffsets[offset] else: - rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) + linestart = None + + if offset not in replace: + rv.append(Token(op_name, oparg, pattr, offset, linestart)) + else: + rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) - if self.showasm: - out = self.out # shortcut - for t in rv: - print >>out, t - print >>out return rv, customize def getOpcodeToDel(self, i): diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 70c80edd..ada5d0e9 100644 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -13,7 +13,7 @@ from operator import itemgetter from uncompyle6.opcodes.opcode_26 import * import dis -import scanner as scan +import uncompyle6.scanner as scan class Scanner26(scan.Scanner): def __init__(self): @@ -71,7 +71,13 @@ class Scanner26(scan.Scanner): linestarts = self.linestarts self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) - linestartoffsets = {a for (a, _) in linestarts} + + # linestarts is a tuple of (offset, line number). + # Turn that in a has that we can index + linestartoffsets = {} + for offset, lineno in linestarts: + linestartoffsets[offset] = lineno + (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: @@ -202,16 +208,16 @@ class Scanner26(scan.Scanner): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset not in replace: - rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) + if offset in linestartoffsets: + linestart = linestartoffsets[offset] else: - rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) + linestart = None + + if offset not in replace: + rv.append(Token(op_name, oparg, pattr, offset, linestart)) + else: + rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) - if self.showasm: - out = self.out # shortcut - for t in rv: - print >>out, t - print >>out return rv, customize def getOpcodeToDel(self, i): diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index f3076e2c..a6aff588 100644 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -46,10 +46,16 @@ class Scanner27(scan.Scanner): self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) + j = 0 - # linestarts contains bloc code adresse (addr,block) + + # linestarts is a tuple of (offset, line number). + # Turn that in a has that we can index linestarts = list(dis.findlinestarts(co)) - linestartoffsets = {a for (a, _) in linestarts} + linestartoffsets = {} + for offset, lineno in linestarts: + linestartoffsets[offset] = lineno + (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: @@ -190,16 +196,16 @@ class Scanner27(scan.Scanner): if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' - if offset not in replace: - rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) + if offset in linestartoffsets: + linestart = linestartoffsets[offset] else: - rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) + linestart = None + + if offset not in replace: + rv.append(Token(op_name, oparg, pattr, offset, linestart)) + else: + rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) - if self.showasm: - out = self.out # shortcut - for t in rv: - print(t, file=out) - print(file=out) return rv, customize def op_size(self, op): diff --git a/uncompyle6/scanners/scanner32.py b/uncompyle6/scanners/scanner32.py index 265ddebe..d6108848 100644 --- a/uncompyle6/scanners/scanner32.py +++ b/uncompyle6/scanners/scanner32.py @@ -11,7 +11,7 @@ from __future__ import print_function import dis, marshal from collections import namedtuple -from uncompyle6.scanner import Token +from uncompyle6.scanner import Token, L65536 # Get all the opcodes into globals @@ -20,7 +20,7 @@ from uncompyle6.opcodes.opcode_27 import * import uncompyle6.scanner as scan -class Scanner34(scan.Scanner): +class Scanner32(scan.Scanner): def __init__(self): self.Token = scan.Scanner.__init__(self, 3.2) # check @@ -62,14 +62,19 @@ class Scanner34(scan.Scanner): # w/o touching arguments current_token = Token(dis.opname[op]) current_token.offset = offset - current_token.linestart = True if offset in self.linestarts else False + + if offset in self.linestarts: + current_token.linestart = self.linestarts[offset] + else: + current_token.linestart = None + if op >= dis.HAVE_ARGUMENT: # Calculate op's argument value based on its argument and # preceding extended argument, if any oparg = code[offset+1] + code[offset+2]*256 + extended_arg extended_arg = 0 if op == dis.EXTENDED_ARG: - extended_arg = oparg*65536 + extended_arg = oparg * L65536 # Fill token's attr/pattr fields current_token.attr = oparg @@ -88,6 +93,7 @@ class Scanner34(scan.Scanner): free = co.co_cellvars + co.co_freevars current_token.pattr = free[oparg] tokens.append(current_token) + return tokens, customize def build_lines_data(self, code_obj): diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index 5c26f170..70dfe627 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -11,8 +11,7 @@ from __future__ import print_function import dis, marshal from collections import namedtuple -from uncompyle6.scanner import Token - +from uncompyle6.scanner import Token, L65536 # Get all the opcodes into globals globals().update(dis.opmap) @@ -62,14 +61,19 @@ class Scanner34(scan.Scanner): # w/o touching arguments current_token = Token(dis.opname[op]) current_token.offset = offset - current_token.linestart = True if offset in self.linestarts else False + + if offset in self.linestarts: + current_token.linestart = self.linestarts[offset] + else: + current_token.linestart = None + if op >= dis.HAVE_ARGUMENT: # Calculate op's argument value based on its argument and # preceding extended argument, if any oparg = code[offset+1] + code[offset+2]*256 + extended_arg extended_arg = 0 if op == dis.EXTENDED_ARG: - extended_arg = oparg*65536 + extended_arg = oparg * L65536 # Fill token's attr/pattr fields current_token.attr = oparg