From 9cdcdfd305e83345d2d96651385a14144b463c43 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 20 Dec 2015 23:03:35 -0500 Subject: [PATCH] Part of a much needed cleanup. Move semantics routines into its own directory. Move out lots of stuff from __init__ to their own files. Add file loading tests. Document AST handling a tad more complete. --- Makefile | 12 +- bin/uncompyle6 | 29 +- pytest/test_load.py | 8 + test/Makefile | 4 +- test/test_pythonlib.py | 8 +- uncompyle6/__init__.py | 276 +----------------- uncompyle6/disas.py | 30 +- uncompyle6/load.py | 106 +++++++ uncompyle6/main.py | 200 +++++++++++++ uncompyle6/semantics/__init__.py | 0 .../{deparser.py => semantics/fragments.py} | 174 ++++------- .../{walker.py => semantics/pysource.py} | 49 +++- uncompyle6/verify.py | 5 +- 13 files changed, 456 insertions(+), 445 deletions(-) create mode 100644 pytest/test_load.py create mode 100644 uncompyle6/load.py create mode 100644 uncompyle6/main.py create mode 100644 uncompyle6/semantics/__init__.py rename uncompyle6/{deparser.py => semantics/fragments.py} (90%) rename uncompyle6/{walker.py => semantics/pysource.py} (97%) diff --git a/Makefile b/Makefile index 87aeb8bf..5d99c686 100644 --- a/Makefile +++ b/Makefile @@ -18,16 +18,8 @@ TEST_TYPES=check-long check-short check-2.7 check-3.4 #: Default target - same as "check" all: check -all test check check_long: - @$(PYTHON) -V && PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \ - $(MAKE) check-$$PYTHON_VERSION - -#: Run working tests from Python 2.7 -check-2.7: pytest - $(MAKE) -C test $@ - -#: Run working tests from Python 3.4 -check-3.4: +#: Run working tests +check check-3.4 check-2.7: pytest $(MAKE) -C test $@ #: Run py.test tests diff --git a/bin/uncompyle6 b/bin/uncompyle6 index c162d341..f26d5a48 100755 --- a/bin/uncompyle6 +++ b/bin/uncompyle6 @@ -2,7 +2,8 @@ # Mode: -*- python -*- # # Copyright (c) 2000-2002 by hartmut Goebel -# +# Copyright (c) 2015 by Rocky Bernstein + """ Usage: uncompyle6 [OPTIONS]... [ FILE | DIR]... @@ -41,15 +42,19 @@ Extensions of generated files: """ from __future__ import print_function -import sys, os, getopt +import sys, os, getopt, time program = os.path.basename(__file__) -Usage_short = \ -"%s [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." % program +from uncompyle6 import verify, check_python_version +from uncompyle6.main import main, status_msg + +def usage(): + print("""usage: + %s [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR... +""" % program) + sys.exit(1) -from uncompyle6 import main, status_msg, verify, check_python_version -import time check_python_version(program) @@ -91,9 +96,8 @@ for opt, val in opts: elif opt == '-r': recurse_dirs = True else: - print(opt) - print(Usage_short) - sys.exit(1) + print(opt, file=sys.stderr) + usage() # expand directory if specified if recurse_dirs: @@ -117,6 +121,11 @@ if src_base: files = [f[sb_len:] for f in files] del sb_len +if not files: + print("No files given", file=sys.stderr) + usage() + + if outfile == '-': outfile = None # use stdout elif outfile and os.path.isdir(outfile): @@ -162,7 +171,7 @@ else: if f is None: break (t, o, f, v) = \ - main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify) + main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify) tot_files += t okay_files += o failed_files += f diff --git a/pytest/test_load.py b/pytest/test_load.py new file mode 100644 index 00000000..ed2878be --- /dev/null +++ b/pytest/test_load.py @@ -0,0 +1,8 @@ +from uncompyle6.load import load_file, check_object_path, load_module + +def test_load(): + """Basic test of load_file, check_object_path and load_module""" + co = load_file(__file__) + obj_path = check_object_path(__file__) + co2 = load_module(obj_path) + assert co == co2[2] diff --git a/test/Makefile b/test/Makefile index 7aee7c0f..72bd9aba 100644 --- a/test/Makefile +++ b/test/Makefile @@ -24,13 +24,15 @@ check-2.7: check-bytecode check-2.7-ok #: Run working tests from Python 3.4 check-3.4: check-bytecode + $(PYTHON) test_pythonlib.py --bytecode-3.4 #: Check deparsing only, but from a different Python version check-disasm: $(PYTHON) dis-compare.py #: Check deparsing bytecode only -check-bytecode: check-bytecode-2.5 check-bytecode-2.5 check-bytecode-3.2 +check-bytecode: + $(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.7 --bytecode-3.2 #: Check deparsing Python 2.5 check-bytecode-2.5: diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index 0453c57d..b124d2c9 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -31,7 +31,8 @@ from __future__ import print_function import getopt, os, py_compile, sys, shutil, tempfile, time -from uncompyle6 import main, PYTHON_VERSION +from uncompyle6 import PYTHON_VERSION +from uncompyle6.main import main from fnmatch import fnmatch def get_srcdir(): @@ -212,10 +213,7 @@ if __name__ == '__main__': print("Can't find directory %s. Skipping" % src_dir, file=sys.stderr) continue - if last_compile_version and last_compile_version != compiled_version: - print("Warning: mixed python version decompylation") - else: - last_compile_version = compiled_version + last_compile_version = compiled_version pass if not checked_dirs: diff --git a/uncompyle6/__init__.py b/uncompyle6/__init__.py index a4b4a2e7..0e622162 100644 --- a/uncompyle6/__init__.py +++ b/uncompyle6/__init__.py @@ -28,19 +28,10 @@ from __future__ import print_function -import imp, os, marshal, sys, types +import sys -# set before importing scanner PYTHON3 = (sys.version_info >= (3, 0)) -import uncompyle6 -from uncompyle6.scanner import get_scanner -from uncompyle6.disas import check_object_path -import uncompyle6.marsh -from uncompyle6 import walker, verify, magics - -sys.setrecursionlimit(5000) - # We do this crazy way to support Python 2.6 which # doesn't support version_major, and has a bug in # floating point so we can't divide 26 by 10 and get @@ -48,6 +39,8 @@ sys.setrecursionlimit(5000) PYTHON_VERSION = sys.version_info[0]+ (sys.version_info[1] / 10.0) PYTHON_VERSION_STR = "%s.%s" % (sys.version_info[0], sys.version_info[1]) +sys.setrecursionlimit(5000) + def check_python_version(program): if not (sys.version_info[0:2] in ((2,6), (2,7), (3,4))): print('Error: %s requires %s Python 2.6, 2.7 or 3.4' % program, @@ -55,261 +48,12 @@ def check_python_version(program): sys.exit(-1) return -__all__ = ['uncompyle_file', 'main'] +import uncompyle6.semantics.pysource +import uncompyle6.semantics.fragments -def _load_file(filename): - ''' - load a Python source file and compile it to byte-code - _load_file(filename: string): code_object - filename: name of file containing Python source code - (normally a .py) - code_object: code_object compiled from this source code - This function does NOT write any file! - ''' - fp = open(filename, 'rb') - source = fp.read().decode('utf-8') + '\n' - try: - co = compile(source, filename, 'exec', dont_inherit=True) - except SyntaxError: - print('>>Syntax error in %s\n' % filename, file= sys.stderr) - raise - fp.close() - return co +# Conventience functions so you can say: +# from uncompyle6 import deparse_code and +# from uncompyle6 import deparse_code_fragments -def load_module(filename): - """ - load a module without importing it. - load_module(filename: string): version, magic_int, code_object - - filename: name of file containing Python byte-code object - (normally a .pyc) - - code_object: code_object from this file - version: Python major/minor value e.g. 2.7. or 3.4 - magic_int: more specific than version. The actual byte code version of the - code object - """ - - with open(filename, 'rb') as fp: - magic = fp.read(4) - try: - version = float(magics.versions[magic]) - except KeyError: - raise ImportError("Unknown magic number %s in %s" % - (ord(magic[0])+256*ord(magic[1]), filename)) - if not (2.5 <= version <= 2.7) and not (3.2 <= version <= 3.4): - raise ImportError("This is a Python %s file! Only " - "Python 2.5 to 2.7 and 3.2 to 3.4 files are supported." - % version) - - # print version - fp.read(4) # timestamp - magic_int = magics.magic2int(magic) - my_magic_int = magics.magic2int(imp.get_magic()) - - if my_magic_int == magic_int: - # Note: a higher magic number necessarily mean a later - # release. At Python 3.0 the magic number decreased - # significantly. Hence the range below. Also note - # inclusion of the size info, occurred within a - # Python magor/minor release. Hence the test on the - # magic value rather than PYTHON_VERSION - if 3200 <= magic_int < 20121: - fp.read(4) # size mod 2**32 - bytecode = fp.read() - co = marshal.loads(bytecode) - else: - co = uncompyle6.marsh.load_code(fp, magic_int) - pass - - return version, magic_int, co - -def uncompyle(version, co, out=None, showasm=False, showast=False): - """ - disassembles and deparses a given code block 'co' - """ - - assert isinstance(co, types.CodeType) - - # store final output stream for case of error - real_out = out or sys.stdout - print('# Python %s' % version, file=real_out) - if co.co_filename: - print('# Embedded file name: %s' % co.co_filename, - file=real_out) - - scanner = get_scanner(version) - tokens, customize = scanner.disassemble(co) - - if showasm: - for t in tokens: - print(t, file=real_out) - print(file=out) - - # Build AST from disassembly. - walk = walker.Walker(version, out, scanner, showast=showast) - try: - ast = walk.build_ast(tokens, customize) - except walker.ParserError as e : # parser failed, dump disassembly - print(e, file=real_out) - raise - del tokens # save memory - - # convert leading '__doc__ = "..." into doc string - assert ast == 'stmts' - try: - if ast[0][0] == walker.ASSIGN_DOC_STRING(co.co_consts[0]): - walk.print_docstring('', co.co_consts[0]) - del ast[0] - if ast[-1] == walker.RETURN_NONE: - ast.pop() # remove last node - # todo: if empty, add 'pass' - except: - pass - walk.mod_globs = walker.find_globals(ast, set()) - walk.gen_source(ast, customize) - for g in walk.mod_globs: - walk.write('global %s ## Warning: Unused global' % g) - if walk.ERROR: - raise walk.ERROR - -def uncompyle_file(filename, outstream=None, showasm=False, showast=False): - """ - decompile Python byte-code file (.pyc) - """ - check_object_path(filename) - version, magic_int, co = load_module(filename) - if type(co) == list: - for con in co: - uncompyle(version, con, outstream, showasm, showast) - else: - uncompyle(version, co, outstream, showasm, showast) - co = None - -# ---- main ---- - -if sys.platform.startswith('linux') and os.uname()[2][:2] in ['2.', '3.', '4.']: - def __memUsage(): - mi = open('/proc/self/stat', 'r') - mu = mi.readline().split()[22] - mi.close() - return int(mu) / 1000000 -else: - def __memUsage(): - return '' - -def status_msg(do_verify, tot_files, okay_files, failed_files, - verify_failed_files): - if tot_files == 1: - if failed_files: - return "decompile failed" - elif verify_failed_files: - return "decompile verify failed" - else: - return "Successfully decompiled file" - pass - pass - mess = "decompiled %i files: %i okay, %i failed" % (tot_files, okay_files, failed_files) - if do_verify: - mess += (", %i verify failed" % verify_failed_files) - return mess - - -def main(in_base, out_base, files, codes, outfile=None, - showasm=False, showast=False, do_verify=False): - ''' - in_base base directory for input files - out_base base directory for output files (ignored when - files list of filenames to be uncompyled (relative to src_base) - outfile write output to this filename (overwrites out_base) - - For redirecting output to - - outfile= (out_base is ignored) - - files below out_base out_base=... - - stdout out_base=None, outfile=None - ''' - def _get_outstream(outfile): - dir = os.path.dirname(outfile) - failed_file = outfile + '_failed' - if os.path.exists(failed_file): - os.remove(failed_file) - try: - os.makedirs(dir) - except OSError: - pass - return open(outfile, 'w') - - of = outfile - tot_files = okay_files = failed_files = verify_failed_files = 0 - - # for code in codes: - # version = sys.version[:3] # "2.5" - # with open(code, "r") as f: - # co = compile(f.read(), "", "exec") - # uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) - - for filename in files: - infile = os.path.join(in_base, filename) - # print (infile, file=sys.stderr) - - if of: # outfile was given as parameter - outstream = _get_outstream(outfile) - elif out_base is None: - outstream = sys.stdout - else: - outfile = os.path.join(out_base, filename) + '_dis' - outstream = _get_outstream(outfile) - # print(outfile, file=sys.stderr) - - # try to decomyple the input file - try: - uncompyle_file(infile, outstream, showasm, showast) - tot_files += 1 - except ValueError as e: - sys.stderr.write("\n# %s" % e) - failed_files += 1 - except KeyboardInterrupt: - if outfile: - outstream.close() - os.remove(outfile) - sys.stderr.write("\nLast file: %s " % (infile)) - raise - except: - failed_files += 1 - if outfile: - outstream.close() - os.rename(outfile, outfile + '_failed') - else: - sys.stderr.write("\n# Can't uncompyle %s\n" % infile) - else: # uncompyle successfull - if outfile: - outstream.close() - if do_verify: - try: - msg = verify.compare_code_with_srcfile(infile, outfile) - if not outfile: - if not msg: - print('\n# okay decompyling %s' % infile) - okay_files += 1 - else: - print('\n# %s\n\t%s', infile, msg) - except verify.VerifyCmpError as e: - verify_failed_files += 1 - os.rename(outfile, outfile + '_unverified') - if not outfile: - print("### Error Verifiying %s" % filename, file=sys.stderr) - print(e, file=sys.stderr) - else: - okay_files += 1 - if not outfile: - mess = '\n# okay decompyling' - # mem_usage = __memUsage() - print(mess, infile) - if outfile: - sys.stdout.write("%s\r" % - status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files)) - sys.stdout.flush() - if outfile: - sys.stdout.write("\n") - sys.stdout.flush() - return (tot_files, okay_files, failed_files, verify_failed_files) +deparse_code = uncompyle6.semantics.pysource.deparse_code +deparse_fragments = uncompyle6.semantics.fragments.deparse_code diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index 866caf77..2a0feff8 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -18,35 +18,11 @@ want to run on Python 2.7. from __future__ import print_function -import inspect, os, py_compile, sys, tempfile +import inspect, os, sys import uncompyle6 -from uncompyle6 import PYTHON3 from uncompyle6.scanner import get_scanner - -def check_object_path(path): - if path.endswith(".py"): - try: - import importlib - return importlib.util.cache_from_source(path, - optimization='') - except: - try: - import imp - imp.cache_from_source(path, debug_override=False) - except: - pass - pass - basename = os.path.basename(path)[0:-3] - spath = path if PYTHON3 else path.decode('utf-8') - path = tempfile.mkstemp(prefix=basename + '-', - suffix='.pyc', text=False)[1] - py_compile.compile(spath, cfile=path) - - if not path.endswith(".pyc") and not path.endswith(".pyo"): - raise ValueError("path %s must point to a .py or .pyc file" % - path) - return path +from uncompyle6.load import check_object_path, load_module def disco(version, co, out=None): """ @@ -78,7 +54,7 @@ def disassemble_file(filename, outstream=None): try to find the corresponding compiled object. """ filename = check_object_path(filename) - version, magic_int, co = uncompyle6.load_module(filename) + version, magic_int, co = load_module(filename) if type(co) == list: for con in co: disco(version, con, outstream) diff --git a/uncompyle6/load.py b/uncompyle6/load.py new file mode 100644 index 00000000..b08887fa --- /dev/null +++ b/uncompyle6/load.py @@ -0,0 +1,106 @@ +# Copyright (c) 2000 by hartmut Goebel +# Copyright (c) 2015 by Rocky Bernstein +from __future__ import print_function + +import imp, marshal, os, py_compile, sys, tempfile + +import uncompyle6.marsh +from uncompyle6 import PYTHON3 +from uncompyle6 import magics + +def check_object_path(path): + if path.endswith(".py"): + try: + import importlib + return importlib.util.cache_from_source(path, + optimization='') + except: + try: + import imp + imp.cache_from_source(path, debug_override=False) + except: + pass + pass + basename = os.path.basename(path)[0:-3] + spath = path if PYTHON3 else path.decode('utf-8') + path = tempfile.mkstemp(prefix=basename + '-', + suffix='.pyc', text=False)[1] + py_compile.compile(spath, cfile=path) + + if not path.endswith(".pyc") and not path.endswith(".pyo"): + raise ValueError("path %s must point to a .py or .pyc file\n" % + path) + return path + +def load_file(filename): + ''' + load a Python source file and compile it to byte-code + _load_file(filename: string): code_object + filename: name of file containing Python source code + (normally a .py) + code_object: code_object compiled from this source code + This function does NOT write any file! + ''' + fp = open(filename, 'rb') + source = fp.read().decode('utf-8') + '\n' + try: + co = compile(source, filename, 'exec', dont_inherit=True) + except SyntaxError: + print('>>Syntax error in %s\n' % filename, file= sys.stderr) + raise + fp.close() + return co + +def load_module(filename): + """ + load a module without importing it. + load_module(filename: string): version, magic_int, code_object + + filename: name of file containing Python byte-code object + (normally a .pyc) + + code_object: code_object from this file + version: Python major/minor value e.g. 2.7. or 3.4 + magic_int: more specific than version. The actual byte code version of the + code object + """ + + with open(filename, 'rb') as fp: + magic = fp.read(4) + try: + version = float(magics.versions[magic]) + except KeyError: + raise ImportError("Unknown magic number %s in %s" % + (ord(magic[0])+256*ord(magic[1]), filename)) + if not (2.5 <= version <= 2.7) and not (3.2 <= version <= 3.4): + raise ImportError("This is a Python %s file! Only " + "Python 2.5 to 2.7 and 3.2 to 3.4 files are supported." + % version) + + # print version + fp.read(4) # timestamp + magic_int = magics.magic2int(magic) + my_magic_int = magics.magic2int(imp.get_magic()) + + if my_magic_int == magic_int: + # Note: a higher magic number necessarily mean a later + # release. At Python 3.0 the magic number decreased + # significantly. Hence the range below. Also note + # inclusion of the size info, occurred within a + # Python magor/minor release. Hence the test on the + # magic value rather than PYTHON_VERSION + if 3200 <= magic_int < 20121: + fp.read(4) # size mod 2**32 + bytecode = fp.read() + co = marshal.loads(bytecode) + else: + co = uncompyle6.marsh.load_code(fp, magic_int) + pass + + return version, magic_int, co + +if __name__ == '__main__': + co = load_file(__file__) + obj_path = check_object_path(__file__) + co2 = load_module(obj_path) + assert co == co2[2] diff --git a/uncompyle6/main.py b/uncompyle6/main.py new file mode 100644 index 00000000..151db24b --- /dev/null +++ b/uncompyle6/main.py @@ -0,0 +1,200 @@ +from __future__ import print_function +import os, sys, types + +from uncompyle6.disas import check_object_path +from uncompyle6 import verify +from uncompyle6.semantics import pysource + +from uncompyle6.scanner import get_scanner +from uncompyle6.load import load_module + +# FIXME: remove duplicate code from deparse_code +def uncompyle(version, co, out=None, showasm=False, showast=False): + """ + disassembles and deparses a given code block 'co' + """ + + assert isinstance(co, types.CodeType) + + # store final output stream for case of error + real_out = out or sys.stdout + print('# Python %s' % version, file=real_out) + if co.co_filename: + print('# Embedded file name: %s' % co.co_filename, + file=real_out) + + scanner = get_scanner(version) + tokens, customize = scanner.disassemble(co) + + if showasm: + for t in tokens: + print(t, file=real_out) + print(file=out) + + # Build AST from disassembly. + walk = pysource.Walker(version, out, scanner, showast=showast) + try: + ast = walk.build_ast(tokens, customize) + except pysource.ParserError as e : # parser failed, dump disassembly + print(e, file=real_out) + raise + del tokens # save memory + + # convert leading '__doc__ = "..." into doc string + assert ast == 'stmts' + try: + if ast[0][0] == pysource.ASSIGN_DOC_STRING(co.co_consts[0]): + walk.print_docstring('', co.co_consts[0]) + del ast[0] + if ast[-1] == pysource.RETURN_NONE: + ast.pop() # remove last node + # todo: if empty, add 'pass' + except: + pass + walk.mod_globs = pysource.find_globals(ast, set()) + walk.gen_source(ast, customize) + for g in walk.mod_globs: + walk.write('global %s ## Warning: Unused global' % g) + if walk.ERROR: + raise walk.ERROR + +def uncompyle_file(filename, outstream=None, showasm=False, showast=False): + """ + decompile Python byte-code file (.pyc) + """ + check_object_path(filename) + version, magic_int, co = load_module(filename) + if type(co) == list: + for con in co: + uncompyle(version, con, outstream, showasm, showast) + else: + uncompyle(version, co, outstream, showasm, showast) + co = None + +def main(in_base, out_base, files, codes, outfile=None, + showasm=False, showast=False, do_verify=False): + ''' + in_base base directory for input files + out_base base directory for output files (ignored when + files list of filenames to be uncompyled (relative to src_base) + outfile write output to this filename (overwrites out_base) + + For redirecting output to + - outfile= (out_base is ignored) + - files below out_base out_base=... + - stdout out_base=None, outfile=None + ''' + def _get_outstream(outfile): + dir = os.path.dirname(outfile) + failed_file = outfile + '_failed' + if os.path.exists(failed_file): + os.remove(failed_file) + try: + os.makedirs(dir) + except OSError: + pass + return open(outfile, 'w') + + of = outfile + tot_files = okay_files = failed_files = verify_failed_files = 0 + + # for code in codes: + # version = sys.version[:3] # "2.5" + # with open(code, "r") as f: + # co = compile(f.read(), "", "exec") + # uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) + + for filename in files: + infile = os.path.join(in_base, filename) + # print (infile, file=sys.stderr) + + if of: # outfile was given as parameter + outstream = _get_outstream(outfile) + elif out_base is None: + outstream = sys.stdout + else: + outfile = os.path.join(out_base, filename) + '_dis' + outstream = _get_outstream(outfile) + # print(outfile, file=sys.stderr) + + # try to decomyple the input file + try: + uncompyle_file(infile, outstream, showasm, showast) + tot_files += 1 + except ValueError as e: + sys.stderr.write("\n# %s" % e) + failed_files += 1 + except KeyboardInterrupt: + if outfile: + outstream.close() + os.remove(outfile) + sys.stderr.write("\nLast file: %s " % (infile)) + raise + except: + failed_files += 1 + if outfile: + outstream.close() + os.rename(outfile, outfile + '_failed') + else: + sys.stderr.write("\n# Can't uncompyle %s\n" % infile) + else: # uncompyle successfull + if outfile: + outstream.close() + if do_verify: + try: + msg = verify.compare_code_with_srcfile(infile, outfile) + if not outfile: + if not msg: + print('\n# okay decompyling %s' % infile) + okay_files += 1 + else: + print('\n# %s\n\t%s', infile, msg) + except verify.VerifyCmpError as e: + verify_failed_files += 1 + os.rename(outfile, outfile + '_unverified') + if not outfile: + print("### Error Verifiying %s" % filename, file=sys.stderr) + print(e, file=sys.stderr) + else: + okay_files += 1 + if not outfile: + mess = '\n# okay decompyling' + # mem_usage = __memUsage() + print(mess, infile) + if outfile: + sys.stdout.write("%s\r" % + status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files)) + sys.stdout.flush() + if outfile: + sys.stdout.write("\n") + sys.stdout.flush() + return (tot_files, okay_files, failed_files, verify_failed_files) + + +# ---- main ---- + +if sys.platform.startswith('linux') and os.uname()[2][:2] in ['2.', '3.', '4.']: + def __memUsage(): + mi = open('/proc/self/stat', 'r') + mu = mi.readline().split()[22] + mi.close() + return int(mu) / 1000000 +else: + def __memUsage(): + return '' + +def status_msg(do_verify, tot_files, okay_files, failed_files, + verify_failed_files): + if tot_files == 1: + if failed_files: + return "decompile failed" + elif verify_failed_files: + return "decompile verify failed" + else: + return "Successfully decompiled file" + pass + pass + mess = "decompiled %i files: %i okay, %i failed" % (tot_files, okay_files, failed_files) + if do_verify: + mess += (", %i verify failed" % verify_failed_files) + return mess diff --git a/uncompyle6/semantics/__init__.py b/uncompyle6/semantics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/uncompyle6/deparser.py b/uncompyle6/semantics/fragments.py similarity index 90% rename from uncompyle6/deparser.py rename to uncompyle6/semantics/fragments.py index 42ba3c85..02c6a420 100644 --- a/uncompyle6/deparser.py +++ b/uncompyle6/semantics/fragments.py @@ -2,61 +2,28 @@ # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2015 by Rocky Bernstein -# See LICENSE for license """ - Deparsing saving text fragment information indexed by offset +Creates Python source code from an uncompyle6 abstract syntax tree, +and indexes fragments which can be accessed by instruction offset +address. - - Decompilation (walking AST) - - All table-driven. (rocky: well, mostly. I need to add more format - specifiers for say duplicating info from one node to another.) - - Step 1 determines a table (T) and a path to a - table key (K) from the node type (N) (other nodes are shown as O): - - N N N&K - / | ... \ / | ... \ / | ... \ - O O O O O K O O O - | - K - - MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) - - The default is a direct mapping. The key K is then extracted from the - subtree and used to find a table entry T[K], if any. The result is a - format string and arguments (a la printf()) for the formatting engine. - Escapes in the format string are: - - %c evaluate N[A] recursively* - %C evaluate N[A[0]]..N[A[1]-1] recursively, separate by A[2]* - %P same as %C but sets operator precedence - %, print ',' if last %C only printed one item (for tuples--unused) - %| tab to current indentation level - %+ increase current indentation level - %- decrease current indentation level - %{...} evaluate ... in context of N - %% literal '%' - %p evaluate N setting precedence - - - * indicates an argument (A) required. - - The '%' may optionally be followed by a number (C) in square brackets, which - makes the engine walk down to N[C] before evaluating the escape code. +See the comments in pysource for information on the abstract sytax tree +and how semantic actions are written. """ +# FIXME: DRY code with pysource + from __future__ import print_function import inspect, re, sys from uncompyle6 import PYTHON3 -from uncompyle6 import walker +from uncompyle6.semantics import pysource from uncompyle6.parser import get_python_parser -from uncompyle6.walker import escape, PRECEDENCE, minint -from uncompyle6.walker import AST, NONE, find_all_globals -from uncompyle6.walker import find_globals, find_none, INDENT_PER_LEVEL -from uncompyle6.walker import ParserError +from uncompyle6.semantics.pysource import escape, PRECEDENCE, minint +from uncompyle6.semantics.pysource import AST, NONE, find_all_globals +from uncompyle6.semantics.pysource import find_globals, find_none, INDENT_PER_LEVEL +from uncompyle6.semantics.pysource import ParserError from uncompyle6 import parser from uncompyle6.scanner import Token, Code, get_scanner @@ -67,8 +34,7 @@ else: from itertools import izip_longest as zip_longest from StringIO import StringIO -# FIXME: remove uncompyle dups -# from uncompyle6.walker import find_all_globals, find_globals, find_none + from uncompyle6.parsers.spark import GenericASTTraversal, GenericASTTraversalPruningException from types import CodeType @@ -77,7 +43,7 @@ NodeInfo = namedtuple("NodeInfo", "node start finish") ExtractInfo = namedtuple("ExtractInfo", "lineNo lineStartOffset markerLine selectedLine selectedText") -class Traverser(walker.Walker, object): +class Traverser(pysource.Walker, object): stacked_params = ('f', 'indent', 'isLambda', '_globals') def __init__(self, version, scanner, showast=False): @@ -1186,7 +1152,7 @@ class Traverser(walker.Walker, object): pass -def deparse(version, co, out=StringIO(), showasm=False, showast=False): +def deparse_code(version, co, out=StringIO(), showasm=False, showast=False): assert inspect.iscode(co) # store final output stream for case of error __real_out = out or sys.stdout @@ -1199,7 +1165,7 @@ def deparse(version, co, out=StringIO(), showasm=False, showast=False): try: walk.ast = walk.build_ast_d(tokens, customize) - except walker.ParserError as e : # parser failed, dump disassembly + except pysource.ParserError as e : # parser failed, dump disassembly print(e, file=__real_out) raise @@ -1207,7 +1173,7 @@ def deparse(version, co, out=StringIO(), showasm=False, showast=False): # convert leading '__doc__ = "..." into doc string assert walk.ast == 'stmts' - walk.mod_globs = walker.find_globals(walk.ast, set()) + walk.mod_globs = pysource.find_globals(walk.ast, set()) walk.gen_source_d(walk.ast, co.co_name, customize) walk.set_pos_info(walk.ast, 0, len(walk.text)) walk.fixup_parents(walk.ast, None) @@ -1219,70 +1185,52 @@ def deparse(version, co, out=StringIO(), showasm=False, showast=False): return walk -if __name__ == '__main__': +# if __name__ == '__main__': - def deparse_test(co): - sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) - walk = deparse(sys_version, co, showasm=True, showast=True) - print("deparsed source") - print(walk.text, "\n") - print('------------------------') - for name, offset in sorted(walk.offsets.keys()): - print("name %s, offset %s" % (name, offset)) - nodeInfo = walk.offsets[name, offset] - node = nodeInfo.node - extractInfo = walk.extract_node_info(node) - print("code: %s" % node.type) - # print extractInfo - print(extractInfo.selectedText) - print(extractInfo.selectedLine) - print(extractInfo.markerLine) - extractInfo, p = walk.extract_parent_info(node) - if extractInfo: - print("Contained in...") - print(extractInfo.selectedLine) - print(extractInfo.markerLine) - print("code: %s" % p.type) - print('=' * 40) - pass - pass - return +# def deparse_test(co): +# sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) +# walk = deparse_code(sys_version, co, showasm=False, showast=False) +# print("deparsed source") +# print(walk.text, "\n") +# print('------------------------') +# for name, offset in sorted(walk.offsets.keys(), +# key=lambda x: str(x[0])): +# print("name %s, offset %s" % (name, offset)) +# nodeInfo = walk.offsets[name, offset] +# node = nodeInfo.node +# extractInfo = walk.extract_node_info(node) +# print("code: %s" % node.type) +# # print extractInfo +# print(extractInfo.selectedText) +# print(extractInfo.selectedLine) +# print(extractInfo.markerLine) +# extractInfo, p = walk.extract_parent_info(node) +# if extractInfo: +# print("Contained in...") +# print(extractInfo.selectedLine) +# print(extractInfo.markerLine) +# print("code: %s" % p.type) +# print('=' * 40) +# pass +# pass +# return - def get_code_for_fn(fn): - return fn.__code__ +# def get_code_for_fn(fn): +# return fn.__code__ - def foo(a, **options): - def bar(a, b=1, c=2): - print("a, b, c= ", a, int(b), c) - bar(a, **options) - options = {'c': 5, 'b': 10} - bar(a, **options) - return None +# def gcd(a, b): +# if a > b: +# (a, b) = (b, a) +# pass - def check_args(args): - deparse_test(inspect.currentframe().f_code) - for i in range(2): - try: - i = int(args[i]) - except ValueError: - print("** Expecting an integer, got: %s" % repr(args[i])) - sys.exit(2) - pass - pass +# if a <= 0: +# return None +# if a == 1 or a == b: +# return a +# return gcd(b-a, a) - def gcd(a, b): - if a > b: - (a, b) = (b, a) - pass - - if a <= 0: - return None - if a == 1 or a == b: - return a - return gcd(b-a, a) - - # check_args(['3', '5']) - deparse_test(get_code_for_fn(gcd)) - # deparse_test(get_code_for_fn(gcd)) - # deparse_test(get_code_for_fn(Traverser.fixup_offsets)) - # deparse_test(inspect.currentframe().f_code) +# # check_args(['3', '5']) +# deparse_test(get_code_for_fn(gcd)) +# # deparse_test(get_code_for_fn(gcd)) +# # deparse_test(get_code_for_fn(Traverser.fixup_offsets)) +# # deparse_test(inspect.currentframe().f_code) diff --git a/uncompyle6/walker.py b/uncompyle6/semantics/pysource.py similarity index 97% rename from uncompyle6/walker.py rename to uncompyle6/semantics/pysource.py index e430d80e..e9c02fb4 100644 --- a/uncompyle6/walker.py +++ b/uncompyle6/semantics/pysource.py @@ -1,11 +1,31 @@ # Copyright (c) 1999 John Aycock # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2015 by Rocky Bernstein """ - Decompilation (walking AST) +Creates Python source code from an uncompyle6 abstract syntax tree. - All table-driven. Step 1 determines a table (T) and a path to a +The terminal symbols are CPython bytecode instructions. (See the +python documentation under module "dis" for a list of instructions +and what they mean). + +Upper levels of the grammar is a more-or-less conventional grammar for +Python. + +Semantic action rules for nonterminal symbols can be table driven. +This mechanism uses a printf-like syntax to direct substitution from +attributes of the nonterminal and its children.. + +The other way to specify a semantic rule is to create a method +prefaced with "n_" for that nonterminal. For example, "n_exec_stmt" +handles the semantic actions for the "exec_smnt" nonterminal symbol. + +The rest of the below describes how table-driven semantic actions work +and gives a list of the format specifiers. The default() and engine() +methods implement most of the below. + + Step 1 determines a table (T) and a path to a table key (K) from the node type (N) (other nodes are shown as O): N N N&K @@ -32,10 +52,12 @@ %% literal '%' %p evaluate N setting precedence + * indicates an argument (A) required. The '%' may optionally be followed by a number (C) in square brackets, which makes the engine walk down to N[C] before evaluating the escape code. + """ from __future__ import print_function @@ -552,8 +574,8 @@ class Walker(GenericASTTraversal, object): def print_docstring(self, indent, docstring): quote = '"""' self.write(indent) - # FIXME for Python3 - if type(docstring) == unicode: + if not PYTHON3 and not isinstance(docstring, str): + # Must be unicode in Python2 self.write('u') docstring = repr(docstring.expandtabs())[2:-1] else: @@ -915,7 +937,6 @@ class Walker(GenericASTTraversal, object): p = self.prec self.prec = 27 n = node[-1] - assert n == 'list_iter' # find innerst node while n == 'list_iter': @@ -943,6 +964,7 @@ class Walker(GenericASTTraversal, object): ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) ast = ast[0][0][0] + n = ast[iter_index] assert n == 'comp_iter' # find innerst node @@ -1464,7 +1486,8 @@ class Walker(GenericASTTraversal, object): if isLambda: self.write(self.traverse(ast, isLambda=isLambda)) else: - self.print_(self.traverse(ast, isLambda=isLambda)) + self.text = self.traverse(ast, isLambda=isLambda) + self.print_(self.text) self.return_none = rn def build_ast(self, tokens, customize, isLambda=0, noneInNames=False): @@ -1505,7 +1528,11 @@ class Walker(GenericASTTraversal, object): return ast -def walker(version, co, out=sys.stdout, showasm=False, showast=False): +def deparse_code(version, co, out=sys.stdout, showasm=False, showast=False): + """ + disassembles and deparses a given code block 'co' + """ + assert inspect.iscode(co) # store final output stream for case of error __real_out = out or sys.stdout @@ -1540,9 +1567,9 @@ def walker(version, co, out=sys.stdout, showasm=False, showast=False): return walk if __name__ == '__main__': - def walk_test(co): + def deparse_test(co): sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) - walker(sys_version, co, showasm=True, showast=True) - print() + deparsed = deparse_code(sys_version, co, showasm=False, showast=False) + print(deparsed.text) return - walk_test(walk_test.__code__) + deparse_test(deparse_test.__code__) diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 3c804104..3e064ce3 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -14,6 +14,7 @@ import uncompyle6 import uncompyle6.scanner as scanner from uncompyle6 import PYTHON3 from uncompyle6.magics import PYTHON_MAGIC_INT +from uncompyle6.load import load_file, load_module # FIXME: DRY if PYTHON3: @@ -348,12 +349,12 @@ class Token(scanner.Token): def compare_code_with_srcfile(pyc_filename, src_filename): """Compare a .pyc with a source code file.""" - version, magic_int, code_obj1 = uncompyle6.load_module(pyc_filename) + version, magic_int, code_obj1 = load_module(pyc_filename) if magic_int != PYTHON_MAGIC_INT: msg = ("Can't compare code - Python is running with magic %s, but code is magic %s " % (PYTHON_MAGIC_INT, magic_int)) return msg - code_obj2 = uncompyle6._load_file(src_filename) + code_obj2 = load_file(src_filename) cmp_code_objects(version, code_obj1, code_obj2) return None