diff --git a/Makefile b/Makefile index bed6ab6d..e4a5aeb5 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ DISTCLEAN_FILES = build dist *.pyc #: Remove ALL derived files distclean: clean - -rm -fr $(DISTCLEAN_FILES) || true + -rm -fvr $(DISTCLEAN_FILES) || true -find . -name \*.pyc -exec rm -v {} \; -find . -name \*.egg-info -exec rm -vr {} \; diff --git a/README.rst b/README.rst index f1a4ec4b..a8e2f272 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,8 @@ uncompyle6 ========== -A Python 2.x and possibly 3.x byte-code decompiler. +A CPython 2.x and possibly 3.x byte-code disassembler and +adecompiler. This is written in Python 2.7 but is Python3 compatible. @@ -21,22 +22,34 @@ compiler 'spark' (http://pages.cpsc.ucalgary.ca/~aycock/spark/) and his prior work on a tool called 'decompyle'. This was improved by Hartmut Goebel http://www.crazy-compilers.com -*Additional note (3 July 2004):* +In order to the decompile a program, we need to be able to disassemble +it first. And this process may be useful in of itself. So we provide a +utility for just that piece as well. -This software is no longer available from the original website. -However http://www.crazy-compilers.com/decompyle/ provides a -decompilation service. +'pydisassemble' gives a CPython disassembly of Python byte-code. How +is this different than what Python already provides via the "dis" +module? Here, we can cross disassemble bytecodes from different +versions of CPython than the version of CPython that is doing the +disassembly. -*Additional note (5 June 2012):* +'pydisassemble works on the same versions as 'uncompyle6' and handles the +same sets of CPython bytecode versions. + +*Note from 3 July 2004:* + +This software was original available from http://www.crazy-compilers.com; +http://www.crazy-compilers.com/decompyle/ provides a decompilation service. + +*Note (5 June 2012):* The decompilation of python bytecode 2.5 & 2.6 is based on the work of Eloi Vanderbeken. bytecode is translated to a pseudo 2.7 python bytecode and then decompiled. -*Additional note (12 Dec 2016):* +*Note (12 Dec 2016):* -This will be used to deparse fragments of code inside my trepan_ -debuggers_. For that, I need to record text fragements for all +This project will be used to deparse fragments of code inside my +trepan_ debuggers_. For that, I need to record text fragements for all byte-code offsets (of interest). This purpose although largely compatible with the original intention is yet a little bit different. diff --git a/scripts/pydissassemble b/scripts/pydissassemble new file mode 100755 index 00000000..9be906d3 --- /dev/null +++ b/scripts/pydissassemble @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# Mode: -*- python -*- +# +# Copyright (c) 2015 by Rocky Bernstein +# +""" +Usage: pydisassemble [OPTIONS]... FILE + +Examples: + pydisassemble foo.pyc + pydisassemble foo.py + pydisassemble -o foo.pydis foo.pyc + pydisassemble -o /tmp foo.pyc + +Options: + -o output decompiled files to this path: + if multiple input files are decompiled, the common prefix + is stripped from these names and the remainder appended to + + --help show this message + +""" + +from __future__ import print_function + +Usage_short = \ +"pydissassemble [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." + +import sys, os, getopt, time, types +import os.path +import uncompyle6 + +def disassemble_code(version, co, out=None): + """ + diassembles and deparses a given code block 'co' + """ + + assert isinstance(co, types.CodeType) + + # store final output stream for case of error + __real_out = out or sys.stdout + print('# Python %s' % version, file=__real_out) + if co.co_filename: + print('# Embedded file name: %s' % co.co_filename, + file=__real_out) + + # Pick up appropriate scanner + if version == 2.7: + import uncompyle6.scanners.scanner27 as scan + scanner = scan.Scanner27() + elif version == 2.6: + import uncompyle6.scanners.scanner26 as scan + scanner = scan.Scanner26() + elif version == 2.5: + import uncompyle6.scanners.scanner25 as scan + scanner = scan.Scanner25() + elif version == 3.2: + import uncompyle6.scanners.scanner32 as scan + scanner = scan.Scanner32() + elif version == 3.4: + import uncompyle6.scanners.scanner34 as scan + scanner = scan.Scanner34() + scanner.setShowAsm(True, out) + tokens, customize = scanner.disassemble(co) + + +def disassemble_file(filename, outstream=None, showasm=False, showast=False): + """ + disassemble Python byte-code file (.pyc) + """ + version, co = uncompyle6.load_module(filename) + if type(co) == list: + for con in co: + disassemble_code(version, con, outstream) + else: + from trepan.api import debug; debug + disassemble_code(version, co, outstream) + co = None + +def disassemble_files(in_base, out_base, files, outfile=None): + """ + in_base base directory for input files + out_base base directory for output files (ignored when + files list of filenames to be uncompyled (relative to src_base) + outfile write output to this filename (overwrites out_base) + + For redirecting output to + - outfile= (out_base is ignored) + - files below out_base out_base=... + - stdout out_base=None, outfile=None + """ + def _get_outstream(outfile): + dir = os.path.dirname(outfile) + failed_file = outfile + '_failed' + if os.path.exists(failed_file): + os.remove(failed_file) + try: + os.makedirs(dir) + except OSError: + pass + return open(outfile, 'w') + + of = outfile + if outfile == '-': + outfile = None # use stdout + elif outfile and os.path.isdir(outfile): + out_base = outfile; outfile = None + elif outfile: + out_base = outfile; outfile = None + + + for filename in files: + infile = os.path.join(in_base, filename) + # print (infile, file=sys.stderr) + + if of: # outfile was given as parameter + outstream = _get_outstream(outfile) + elif out_base is None: + outstream = sys.stdout + else: + outfile = os.path.join(out_base, file) + '_dis' + outstream = _get_outstream(outfile) + # print(outfile, file=sys.stderr) + pass + + # try to decomyple the input file + try: + disassemble_file(infile, outstream, showasm=True, showast=False) + except KeyboardInterrupt: + if outfile: + outstream.close() + os.remove(outfile) + raise + except: + if outfile: + outstream.close() + os.rename(outfile, outfile + '_failed') + else: + sys.stderr.write("\n# Can't disassemble %s\n" % infile) + import traceback + traceback.print_exc() + else: # uncompyle successfull + if outfile: + outstream.close() + if not outfile: print('\n# okay decompyling', infile) + sys.stdout.flush() + + if outfile: + sys.stdout.write("\n") + sys.stdout.flush() + return + +if sys.version[:3] != '2.7' and sys.version[:3] != '3.4': + print('Error: pydisassemble requires Python 2.7 or 3.4.', file=sys.stderr) + sys.exit(-1) + +outfile = '-' +out_base = None + + +try: + opts, files = getopt.getopt(sys.argv[1:], 'ho:', ['help']) +except getopt.GetoptError as e: + print('%s: %s' % (os.path.basename(sys.argv[0]), e), file=sys.stderr) + sys.exit(-1) + +for opt, val in opts: + if opt in ('-h', '--help'): + print(__doc__) + sys.exit(0) + elif opt == '-o': + outfile = val + else: + print(opt) + print(Usage_short) + sys.exit(1) + +# argl, commonprefix works on strings, not on path parts, +# thus we must handle the case with files in 'some/classes' +# and 'some/cmds' +src_base = os.path.commonprefix(files) +if src_base[-1:] != os.sep: + src_base = os.path.dirname(src_base) +if src_base: + sb_len = len( os.path.join(src_base, '') ) + files = [f[sb_len:] for f in files] + del sb_len + +if outfile == '-': + outfile = None # use stdout +elif outfile and os.path.isdir(outfile): + out_base = outfile; outfile = None +elif outfile and len(files) > 1: + out_base = outfile; outfile = None + +disassemble_files(src_base, out_base, files, outfile) diff --git a/scripts/uncompyle6 b/scripts/uncompyle6 index cc973d76..f4b59383 100755 --- a/scripts/uncompyle6 +++ b/scripts/uncompyle6 @@ -3,9 +3,7 @@ # # Copyright (c) 2000-2002 by hartmut Goebel # -from __future__ import print_function - -''' +""" Usage: uncompyle6 [OPTIONS]... [ FILE | DIR]... Examples: @@ -18,11 +16,11 @@ Options: if multiple input files are decompiled, the common prefix is stripped from these names and the remainder appended to - uncompyle -o /tmp bla/fasel.pyc bla/foo.pyc + uncompyle6 -o /tmp bla/fasel.pyc bla/foo.pyc -> /tmp/fasel.pyc_dis, /tmp/foo.pyc_dis - uncompyle -o /tmp bla/fasel.pyc bar/foo.pyc + uncompyle6 -o /tmp bla/fasel.pyc bar/foo.pyc -> /tmp/bla/fasel.pyc_dis, /tmp/bar/foo.pyc_dis - uncompyle -o /tmp /usr/lib/python1.5 + uncompyle6 -o /tmp /usr/lib/python1.5 -> /tmp/smtplib.pyc_dis ... /tmp/lib-tk/FixTk.pyc_dis -c attempts a disassembly after compiling -d do not print timestamps @@ -40,7 +38,9 @@ Extensions of generated files: '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) + '_unverified' successfully decompile but --verify failed + '_failed' decompile failed (contact author for enhancement) -''' +""" + +from __future__ import print_function Usage_short = \ "uncompyle6 [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." @@ -54,7 +54,8 @@ if sys.version[:3] != '2.7' and sys.version[:3] != '3.4': print('Error: uncompyle6 requires Python 2.7 or 3.4.', file=sys.stderr) sys.exit(-1) -showasm = showast = do_verify = numproc = recurse_dirs = 0 +showasm = showast = do_verify = recurse_dirs = False +numproc = 0 outfile = '-' out_base = None codes = [] @@ -73,13 +74,13 @@ for opt, val in opts: print(__doc__) sys.exit(0) elif opt == '--verify': - do_verify = 1 + do_verify = True elif opt in ('--showasm', '-a'): - showasm = 1 - do_verify = 0 + showasm = True + do_verify = False elif opt in ('--showast', '-t'): - showast = 1 - do_verify = 0 + showast = True + do_verify = False elif opt == '-o': outfile = val elif opt == '-d': @@ -89,7 +90,7 @@ for opt, val in opts: elif opt == '-p': numproc = int(val) elif opt == '-r': - recurse_dirs = 1 + recurse_dirs = True else: print(opt) print(Usage_short) diff --git a/test/Makefile b/test/Makefile index 17eb0c3c..f8ac49f7 100644 --- a/test/Makefile +++ b/test/Makefile @@ -38,4 +38,4 @@ clean-unverified: #: Clean temporary compile/decompile/verify direcotries in /tmp clean-py-dis: - rm -fr /tmp/py-dis-* || true + rm -fvr /tmp/py-dis-* || true diff --git a/uncompyle6/__init__.py b/uncompyle6/__init__.py index 64da7eaa..6d6a11b8 100644 --- a/uncompyle6/__init__.py +++ b/uncompyle6/__init__.py @@ -42,7 +42,7 @@ __all__ = ['uncompyle_file', 'main'] def _load_file(filename): ''' load a Python source file and compile it to byte-code - _load_module(filename: string): code_object + _load_file(filename: string): code_object filename: name of file containing Python source code (normally a .py) code_object: code_object compiled from this source code @@ -58,10 +58,10 @@ def _load_file(filename): fp.close() return co -def _load_module(filename): +def load_module(filename): ''' load a module without importing it - _load_module(filename: string): code_object + load_module(filename: string): code_object filename: name of file containing Python byte-code object (normally a .pyc) code_object: code_object from this file @@ -100,10 +100,10 @@ def _load_module(filename): return version, co -def uncompyle(version, co, out=None, showasm=0, showast=0): - ''' - diassembles a given code block 'co' - ''' +def uncompyle(version, co, out=None, showasm=False, showast=False): + """ + diassembles and deparses a given code block 'co' + """ assert isinstance(co, types.CodeType) @@ -160,11 +160,11 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): if walk.ERROR: raise walk.ERROR -def uncompyle_file(filename, outstream=None, showasm=0, showast=0): +def uncompyle_file(filename, outstream=None, showasm=False, showast=False): """ decompile Python byte-code file (.pyc) """ - version, co = _load_module(filename) + version, co = load_module(filename) if type(co) == list: for con in co: uncompyle(version, con, outstream, showasm, showast) @@ -174,9 +174,10 @@ def uncompyle_file(filename, outstream=None, showasm=0, showast=0): # ---- main ---- -if sys.platform.startswith('linux') and os.uname()[2][:2] == '2.': +if sys.platform.startswith('linux') and os.uname()[2][:2] in ['2.', '3.', '4.']: def __memUsage(): mi = open('/proc/self/stat', 'r') + from trepan.api import debug; debug() mu = mi.readline().split()[22] mi.close() return int(mu) / 1000000 @@ -202,7 +203,7 @@ def status_msg(do_verify, tot_files, okay_files, failed_files, def main(in_base, out_base, files, codes, outfile=None, - showasm=0, showast=0, do_verify=0): + showasm=False, showast=False, do_verify=False): ''' in_base base directory for input files out_base base directory for output files (ignored when @@ -234,8 +235,8 @@ def main(in_base, out_base, files, codes, outfile=None, # co = compile(f.read(), "", "exec") # uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) - for file in files: - infile = os.path.join(in_base, file) + for filename in files: + infile = os.path.join(in_base, filename) # print (infile, file=sys.stderr) if of: # outfile was given as parameter @@ -243,7 +244,7 @@ def main(in_base, out_base, files, codes, outfile=None, elif out_base is None: outstream = sys.stdout else: - outfile = os.path.join(out_base, file) + '_dis' + outfile = os.path.join(out_base, filename) + '_dis' outstream = _get_outstream(outfile) # print(outfile, file=sys.stderr) @@ -282,7 +283,10 @@ def main(in_base, out_base, files, codes, outfile=None, print(e, file=sys.stderr) else: okay_files += 1 - if not outfile: print('\n# okay decompyling', infile, __memUsage()) + if not outfile: + mess = '\n# okay decompyling' + # mem_usage = __memUsage() + print(mess, infile) if outfile: sys.stdout.write("%s\r" % status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files)) diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index f1dc9b2b..a38adee6 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -338,14 +338,14 @@ class Token(scanner.Token): def compare_code_with_srcfile(pyc_filename, src_filename): """Compare a .pyc with a source code file.""" - version, code_obj1 = uncompyle6._load_module(pyc_filename) + version, code_obj1 = uncompyle6.load_module(pyc_filename) code_obj2 = uncompyle6._load_file(src_filename) cmp_code_objects(version, code_obj1, code_obj2) def compare_files(pyc_filename1, pyc_filename2): """Compare two .pyc files.""" - version, code_obj1 = uncompyle6._load_module(pyc_filename1) - version, code_obj2 = uncompyle6._load_module(pyc_filename2) + version, code_obj1 = uncompyle6.load_module(pyc_filename1) + version, code_obj2 = uncompyle6.load_module(pyc_filename2) cmp_code_objects(version, code_obj1, code_obj2) if __name__ == '__main__':