diff --git a/pytest/test_disasm.py-notyet b/pytest/test_disasm.py-notyet index e6b810c1..97ed847c 100644 --- a/pytest/test_disasm.py-notyet +++ b/pytest/test_disasm.py-notyet @@ -11,20 +11,14 @@ src_dir = get_srcdir() os.chdir(src_dir) -@pytest.mark.parametrize(("test_tuple", "function_to_test"), [ - ( - ('../test/bytecode_2.7/05_if.pyc', 'testdata/if-2.7.right',), - disassemble_file - ), - ( - ('../test/bytecode_2.7/05_ifelse.pyc', 'testdata/ifelse-2.7.right',), - disassemble_file - ), +@pytest.mark.parametrize(("test_tuple"), [ + ('../test/bytecode_2.7/05_if.pyc', 'testdata/if-2.7.right',), + ('../test/bytecode_2.7/05_ifelse.pyc', 'testdata/ifelse-2.7.right',), ]) -def test_funcoutput(capfd, test_tuple, function_to_test): +def test_funcoutput(capfd, test_tuple): - in_file , filename_expected = test_tuple - function_to_test(in_file, native=False) + in_file, filename_expected = test_tuple + disassemble_file(in_file) resout, reserr = capfd.readouterr() expected = open(filename_expected, "r").read() if resout != expected: diff --git a/requirements-dev.txt b/requirements-dev.txt index d5030482..586d311b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,3 @@ pytest>=3.0.0 flake8 -hypothesis +hypothesis<=3.8.3 diff --git a/test/Makefile b/test/Makefile index fe954485..ee2c971f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -86,7 +86,7 @@ check-bytecode: check-bytecode-3 $(PYTHON) test_pythonlib.py \ --bytecode-2.1 --bytecode-2.2 --bytecode-2.3 --bytecode-2.4 \ --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 \ - --bytecode-pypy2.7 --bytecode-1 + --bytecode-pypy2.7 #: Check deparsing bytecode 1.5 only @@ -177,10 +177,12 @@ grammar-coverage-3.5: #: Check deparsing Python 2.6 check-bytecode-2.6: $(PYTHON) test_pythonlib.py --bytecode-2.6 --weak-verify + $(PYTHON) test_pythonlib.py --bytecode-2.6-run --verify-run #: Check deparsing Python 2.7 check-bytecode-2.7: $(PYTHON) test_pythonlib.py --bytecode-2.7 --weak-verify + $(PYTHON) test_pythonlib.py --bytecode-2.7-run --verify-run #: Check deparsing Python 3.0 check-bytecode-3.0: @@ -197,22 +199,27 @@ check-bytecode-3.2: #: Check deparsing Python 3.3 check-bytecode-3.3: $(PYTHON) test_pythonlib.py --bytecode-3.3 --weak-verify + $(PYTHON) test_pythonlib.py --bytecode-3.3-run --verify-run #: Check deparsing Python 3.4 check-bytecode-3.4: $(PYTHON) test_pythonlib.py --bytecode-3.4 --weak-verify + $(PYTHON) test_pythonlib.py --bytecode-3.4-run --verify-run #: Check deparsing Python 3.5 check-bytecode-3.5: $(PYTHON) test_pythonlib.py --bytecode-3.5 --weak-verify + $(PYTHON) test_pythonlib.py --bytecode-3.5-run --verify-run #: Check deparsing Python 3.6 check-bytecode-3.6: $(PYTHON) test_pythonlib.py --bytecode-3.6 --weak-verify + $(PYTHON) test_pythonlib.py --bytecode-3.6-run --verify-run #: short tests for bytecodes only for this version of Python check-native-short: $(PYTHON) test_pythonlib.py --bytecode-$(PYTHON_VERSION) --weak-verify $(COMPILE) + $(PYTHON) test_pythonlib.py --bytecode-$(PYTHON_VERSION)-run --verify-run $(COMPILE) #: Run longer Python 2.6's lib files known to be okay check-2.4-ok: diff --git a/test/bytecode_2.4_run/README b/test/bytecode_2.4_run/README new file mode 100644 index 00000000..3fa5e010 --- /dev/null +++ b/test/bytecode_2.4_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 2.4 + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 2.4 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_2.5_run/README b/test/bytecode_2.5_run/README new file mode 100644 index 00000000..097cc5ba --- /dev/null +++ b/test/bytecode_2.5_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 2.5. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 2.5 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_2.6_run/README b/test/bytecode_2.6_run/README new file mode 100644 index 00000000..a827636a --- /dev/null +++ b/test/bytecode_2.6_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 2.6. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 2.6 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_2.7/02_ifelsetmtl.pyc b/test/bytecode_2.7/02_ifelsetmtl.pyc new file mode 100644 index 00000000..fd580645 Binary files /dev/null and b/test/bytecode_2.7/02_ifelsetmtl.pyc differ diff --git a/test/bytecode_2.7/01_float.pyc b/test/bytecode_2.7_run/01_float.pyc similarity index 100% rename from test/bytecode_2.7/01_float.pyc rename to test/bytecode_2.7_run/01_float.pyc diff --git a/test/bytecode_2.7_run/README b/test/bytecode_2.7_run/README new file mode 100644 index 00000000..50815908 --- /dev/null +++ b/test/bytecode_2.7_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 2.7. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 2.7 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_3.0/README b/test/bytecode_3.0/README new file mode 100644 index 00000000..bb6f05ca --- /dev/null +++ b/test/bytecode_3.0/README @@ -0,0 +1 @@ +These are byte-compiled programs compiled by Python 3.0 diff --git a/test/bytecode_3.0_run/README b/test/bytecode_3.0_run/README new file mode 100644 index 00000000..c89d5d61 --- /dev/null +++ b/test/bytecode_3.0_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 3.0. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 3.0 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_3.1/README b/test/bytecode_3.1/README new file mode 100644 index 00000000..cb4960b3 --- /dev/null +++ b/test/bytecode_3.1/README @@ -0,0 +1 @@ +These are byte-compiled programs compiled by Python 3.1 diff --git a/test/bytecode_3.1_run/README b/test/bytecode_3.1_run/README new file mode 100644 index 00000000..96ace639 --- /dev/null +++ b/test/bytecode_3.1_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 3.1. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 3.1 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_3.2_run/README b/test/bytecode_3.2_run/README new file mode 100644 index 00000000..7f82a6f6 --- /dev/null +++ b/test/bytecode_3.2_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 3.2. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 3.2 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_3.3_run/README b/test/bytecode_3.3_run/README new file mode 100644 index 00000000..68c21035 --- /dev/null +++ b/test/bytecode_3.3_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 3.3. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 3.3 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_3.4_run/README b/test/bytecode_3.4_run/README new file mode 100644 index 00000000..e60f8376 --- /dev/null +++ b/test/bytecode_3.4_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 3.4. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 3.4 interpreter, they will give an error if they +are miscompiled. diff --git a/test/bytecode_3.5_run/README b/test/bytecode_3.5_run/README new file mode 100644 index 00000000..a5f9700b --- /dev/null +++ b/test/bytecode_3.5_run/README @@ -0,0 +1,5 @@ +These are byte-compiled programs compiled by Python 3.5. + +Furthrmore the programs here are self-checking: when decompiled and +then run again in a 3.5 interpreter, they will give an error if they +are miscompiled. diff --git a/test/simple_source/bug27+/02_ifelsetmtl.py b/test/simple_source/bug27+/02_ifelsetmtl.py new file mode 100644 index 00000000..72fb520b --- /dev/null +++ b/test/simple_source/bug27+/02_ifelsetmtl.py @@ -0,0 +1,11 @@ +# Issue #148 on 2.7 +# Bug is in handling CONTINUE like JUMP_BACK +# Similar code is probably found in a 2.7 stdlib. mapurl? +def reduce_url(url): + atoms = [] + for atom in url: + if atom == '.': + pass # JUMP_BACK is patched as CONTINUE here + elif atom == '..': + atoms.push() + return atoms diff --git a/test/stdlib/runtests.sh b/test/stdlib/runtests.sh index 222e310d..375cdaf8 100755 --- a/test/stdlib/runtests.sh +++ b/test/stdlib/runtests.sh @@ -60,6 +60,7 @@ case $PYVERSION in ;; 2.7) SKIP_TESTS=( + [test_curses.py]=1 # Possibly fails on its own but not detected [test_dis.py]=1 # We change line numbers - duh! [test_doctest.py]=1 [test_grammar.py]=1 # Too many stmts. Handle large stmts diff --git a/test/test_pyenvlib.py b/test/test_pyenvlib.py index 583d0fd8..c0fbda41 100755 --- a/test/test_pyenvlib.py +++ b/test/test_pyenvlib.py @@ -128,17 +128,19 @@ if __name__ == '__main__': test_options_keys = list(test_options.keys()) test_options_keys.sort() opts, args = getopt.getopt(sys.argv[1:], '', - ['start-with=', 'verify', 'weak-verify', + ['start-with=', 'verify', 'verify-run', 'weak-verify', 'max=', 'coverage', 'all', ] \ + test_options_keys ) vers = '' for opt, val in opts: if opt == '--verify': - do_verify = True - if opt == '--weak-verify': + do_verify = 'strong' + elif opt == '--weak-verify': do_verify = 'weak' - if opt == '--coverage': + elif opt == '--verify-run': + do_verify = 'verify-run' + elif opt == '--coverage': do_coverage = True elif opt == '--start-with': start_with = val diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index 3cdbb6d2..17cfd5d2 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -83,6 +83,9 @@ for vers in (1.5, bytecode = "bytecode_%s" % vers key = "bytecode-%s" % vers test_options[key] = (bytecode, PYC, bytecode, vers) + bytecode = "bytecode_%s_run" % vers + key = "bytecode-%s-run" % vers + test_options[key] = (bytecode, PYC, bytecode, vers) key = "%s" % vers pythonlib = "python%s" % vers if isinstance(vers, float) and vers >= 3.0: @@ -189,8 +192,9 @@ if __name__ == '__main__': test_options_keys = list(test_options.keys()) test_options_keys.sort() opts, args = getopt.getopt(sys.argv[1:], '', - ['start-with=', 'verify', 'weak-verify', 'all', 'compile', - 'coverage', + ['start-with=', 'verify', 'verify-run', + 'weak-verify', 'all', + 'compile', 'coverage', 'no-rm'] \ + test_options_keys ) if not opts: help() @@ -205,9 +209,11 @@ if __name__ == '__main__': for opt, val in opts: if opt == '--verify': - test_opts['do_verify'] = True + test_opts['do_verify'] = 'strong' elif opt == '--weak-verify': test_opts['do_verify'] = 'weak' + elif opt == '--verify-run': + test_opts['do_verify'] = 'verify-run' elif opt == '--compile': test_opts['do_compile'] = True elif opt == '--start-with': diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index b2c41765..cebb760e 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -34,6 +34,8 @@ Options: -p use number of processes -r recurse directories looking for .pyc and .pyo files --verify compare generated source with input byte-code + --verify-run compile generated source, run it and check exit code + --weak-verify compile generated source --linemaps generated line number correspondencies between byte-code and generated source output --help show this message @@ -57,7 +59,7 @@ from uncompyle6.version import VERSION def usage(): print("""usage: - %s [--verify] [--asm] [--tree] [--grammar] [-o ] FILE|DIR... + %s [--verify | --weak-verify ] [--asm] [--tree] [--grammar] [-o ] FILE|DIR... %s [--help | -h | --version | -V] """ % (program, program)) sys.exit(1) @@ -82,7 +84,7 @@ def main_bin(): try: opts, files = getopt.getopt(sys.argv[1:], 'hagtdrVo:c:p:', 'help asm grammar linemaps recurse timestamp tree ' - 'verify version showgrammar'.split(' ')) + 'verify verify-run version showgrammar'.split(' ')) except getopt.GetoptError, e: sys.stderr.write('%s: %s\n' % (os.path.basename(sys.argv[0]), e)) sys.exit(-1) @@ -96,15 +98,19 @@ def main_bin(): print("%s %s" % (program, VERSION)) sys.exit(0) elif opt == '--verify': - options['do_verify'] = True + options['do_verify'] = 'strong' + elif opt == '--weak-verify': + options['do_verify'] = 'weak' + elif opt == '--verify-run': + options['do_verify'] = 'verify-run' elif opt == '--linemaps': options['do_linemaps'] = True elif opt in ('--asm', '-a'): options['showasm'] = 'after' - options['do_verify'] = False + options['do_verify'] = None elif opt in ('--tree', '-t'): options['showast'] = True - options['do_verify'] = False + options['do_verify'] = None elif opt in ('--grammar', '-g'): options['showgrammar'] = True elif opt == '-o': @@ -161,7 +167,7 @@ def main_bin(): result = main(src_base, out_base, files, codes, outfile, **options) if len(files) > 1: - mess = status_msg(do_verify, *result) + mess = status_msg(do_verify, result, do_verify) print('# ' + mess) pass except (KeyboardInterrupt): diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index bb358989..87b9084f 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2016 by Rocky Bernstein +# Copyright (c) 2015-2016, 2818 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -13,7 +13,7 @@ that is doing the extraction. Second, we need structured instruction information for the (de)-parsing step. Python 3.4 and up provides this, but we still do -want to run on Python 2.7. +want to run on earlier Python versions. """ import sys @@ -21,7 +21,6 @@ from collections import deque import uncompyle6 -from xdis.main import disassemble_file as xdisassemble_file from xdis.code import iscode from xdis.load import check_object_path, load_module from uncompyle6.scanner import get_scanner @@ -61,17 +60,13 @@ def disco_loop(disasm, queue, real_out): pass pass -def disassemble_file(filename, outstream=None, native=False): +def disassemble_file(filename, outstream=None): """ disassemble Python byte-code file (.pyc) If given a Python source file (".py") file, we'll try to find the corresponding compiled object. """ - if native: - xdisassemble_file(filename, outstream) - return - filename = check_object_path(filename) (version, timestamp, magic_int, co, is_pypy, source_size) = load_module(filename) @@ -86,14 +81,14 @@ def _test(): """Simple test program to disassemble a file.""" argc = len(sys.argv) if argc != 2: - if argc == 1 and uncompyle6.PYTHON3: + if argc == 1: fn = __file__ else: sys.stderr.write("usage: %s [-|CPython compiled file]\n" % __file__) sys.exit(2) else: fn = sys.argv[1] - disassemble_file(fn, native=True) + disassemble_file(fn) if __name__ == "__main__": _test() diff --git a/uncompyle6/main.py b/uncompyle6/main.py index e2ed7795..8027d511 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -1,6 +1,6 @@ import datetime, os, subprocess, sys, tempfile -from uncompyle6 import verify, IS_PYPY +from uncompyle6 import verify, IS_PYPY, PYTHON_VERSION from xdis.code import iscode from uncompyle6.disas import check_object_path from uncompyle6.semantics import pysource @@ -8,19 +8,41 @@ from uncompyle6.parser import ParserError from uncompyle6.version import VERSION from uncompyle6.linenumbers import line_number_mapping +from uncompyle6.semantics.pysource import deparse_code +from uncompyle6.semantics.linemap import deparse_code_with_map + from xdis.load import load_module +def _get_outstream(outfile): + dir = os.path.dirname(outfile) + failed_file = outfile + '_failed' + if os.path.exists(failed_file): + os.remove(failed_file) + try: + os.makedirs(dir) + except OSError: + pass + return open(outfile, 'w') + def decompile( bytecode_version, co, out=None, showasm=None, showast=False, timestamp=None, showgrammar=False, code_objects={}, - source_size=None, is_pypy=False, magic_int=None): + source_size=None, is_pypy=False, magic_int=None, + mapstream=None): """ ingests and deparses a given code block 'co' - """ - assert iscode(co) + Caller is responsible for closing `out` and `mapstream` + """ # store final output stream for case of error real_out = out or sys.stdout + + def write(s): + s += '\n' + real_out.write(s) + + assert iscode(co) + if is_pypy: co_pypy_str = 'PyPy ' else: @@ -35,51 +57,75 @@ def decompile( m = str(magic_int) else: m = "" - real_out.write('# uncompyle6 version %s\n' - '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s\n' % + + sys_version_lines = sys.version.split('\n') + write('# uncompyle6 version %s\n' + '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' % (VERSION, co_pypy_str, bytecode_version, - " (%s)" % m, run_pypy_str, - '\n# '.join(sys.version.split('\n')))) + " (%s)" % m, run_pypy_str, + '\n# '.join(sys_version_lines))) if co.co_filename: - real_out.write('# Embedded file name: %s\n' % co.co_filename) + write('# Embedded file name: %s' % co.co_filename,) if timestamp: - real_out.write('# Compiled at: %s\n' % - datetime.datetime.fromtimestamp(timestamp)) + write('# Compiled at: %s' % + datetime.datetime.fromtimestamp(timestamp)) if source_size: - real_out.write('# Size of source mod 2**32: %d bytes\n' % source_size) + real_out.write('# Size of source mod 2**32: %d bytes\n' % + source_size) - pysource.deparse_code(bytecode_version, co, out, showasm, showast, - showgrammar, code_objects=code_objects, - is_pypy=is_pypy) + try: + if mapstream: + if isinstance(mapstream, str): + mapstream = _get_outstream(mapstream) -# For compatiblity -uncompyle = decompile + deparsed = deparse_code_with_map(bytecode_version, co, out, showasm, showast, + showgrammar, + code_objects = code_objects, + is_pypy = is_pypy, + ) + header_count = 3+len(sys_version_lines) + linemap = [(line_no, deparsed.source_linemap[line_no]+header_count) + for line_no in + sorted(deparsed.source_linemap.keys())] + mapstream.write("\n\n# %s\n" % linemap) + else: + deparsed = deparse_code(bytecode_version, co, out, showasm, showast, + showgrammar, code_objects=code_objects, + is_pypy=is_pypy) + pass + return deparsed + except pysource.SourceWalkerError, e: + # deparsing failed + raise pysource.SourceWalkerError(str(e)) def decompile_file(filename, outstream=None, showasm=None, showast=False, - showgrammar=False): + showgrammar=False, mapstream=None): """ - decompile Python byte-code file (.pyc) + decompile Python byte-code file (.pyc). Return objects to + all of the deparsed objects found in `filename`. """ filename = check_object_path(filename) code_objects = {} (version, timestamp, magic_int, co, is_pypy, - source_size) = load_module(filename, code_objects) + source_size) = load_module(filename, code_objects) - if type(co) == list: + if isinstance(co, list): + deparsed = [] for con in co: - decompile(version, con, outstream, showasm, showast, - timestamp, showgrammar, code_objects=code_objects, - is_pypy=is_pypy, magic_int=magic_int) + deparsed.append( + decompile(version, con, outstream, showasm, showast, + timestamp, showgrammar, code_objects=code_objects, + is_pypy=is_pypy, magic_int=magic_int), + mapstream=mapstream) else: - decompile(version, co, outstream, showasm, showast, - timestamp, showgrammar, - code_objects=code_objects, source_size=source_size, - is_pypy=is_pypy, magic_int=magic_int) + deparsed = [decompile(version, co, outstream, showasm, showast, + timestamp, showgrammar, + code_objects=code_objects, source_size=source_size, + is_pypy=is_pypy, magic_int=magic_int, + mapstream=mapstream)] co = None - -# For compatiblity -uncompyle_file = decompile_file + return deparsed # FIXME: combine into an options parameter @@ -98,19 +144,9 @@ def main(in_base, out_base, files, codes, outfile=None, - files below out_base out_base=... - stdout out_base=None, outfile=None """ - def _get_outstream(outfile): - dir = os.path.dirname(outfile) - failed_file = outfile + '_failed' - if os.path.exists(failed_file): - os.remove(failed_file) - try: - os.makedirs(dir) - except OSError: - pass - return open(outfile, 'w') - tot_files = okay_files = failed_files = verify_failed_files = 0 current_outfile = outfile + linemap_stream = None for filename in files: infile = os.path.join(in_base, filename) @@ -119,25 +155,31 @@ def main(in_base, out_base, files, codes, outfile=None, % infile) continue + if do_linemaps: + linemap_stream = infile + '.pymap' + pass + # print (infile, file=sys.stderr) if outfile: # outfile was given as parameter outstream = _get_outstream(outfile) elif out_base is None: outstream = sys.stdout - if do_linemaps or do_verify: - prefix = os.path.basename(filename) + if do_linemaps: + linemap_stream = sys.stdout + if do_verify: + prefix = os.path.basename(filename) + '-' if prefix.endswith('.py'): prefix = prefix[:-len('.py')] - junk, outfile = tempfile.mkstemp(suffix=".py", - prefix=prefix) + # Unbuffer output if possible if sys.stdout.isatty(): buffering = -1 else: buffering = 0 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', buffering) - tee = subprocess.Popen(["tee", outfile], stdin=subprocess.PIPE) + tee = subprocess.Popen(["tee", current_outfile], + stdin=subprocess.PIPE) os.dup2(tee.stdin.fileno(), sys.stdout.fileno()) os.dup2(tee.stdin.fileno(), sys.stderr.fileno()) else: @@ -145,12 +187,16 @@ def main(in_base, out_base, files, codes, outfile=None, current_outfile = os.path.join(out_base, filename[0:-1]) else: current_outfile = os.path.join(out_base, filename) + '_dis' + pass + pass + outstream = _get_outstream(current_outfile) + # print(current_outfile, file=sys.stderr) # Try to uncompile the input file try: - decompile_file(infile, outstream, showasm, showast, showgrammar) + decompile_file(infile, outstream, showasm, showast, showgrammar, linemap_stream) tot_files += 1 except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError): sys.stdout.write("\n") @@ -173,19 +219,13 @@ def main(in_base, out_base, files, codes, outfile=None, # sys.stderr.write("\n# Can't uncompile %s\n" % infile) else: # uncompile successful if current_outfile: - if do_linemaps: - mapping = line_number_mapping(infile, current_outfile) - outstream.write("\n\n## Line number correspondences\n") - import pprint - s = pprint.pformat(mapping, indent=2, width=80) - s2 = '##' + '\n##'.join(s.split("\n")) + "\n" - outstream.write(s2) outstream.close() if do_verify: - weak_verify = do_verify == 'weak' try: - msg = verify.compare_code_with_srcfile(infile, current_outfile, weak_verify=weak_verify) + msg = verify.compare_code_with_srcfile(infile, + current_outfile, + do_verify) if not current_outfile: if not msg: print '\n# okay decompiling %s' % infile @@ -204,7 +244,7 @@ def main(in_base, out_base, files, codes, outfile=None, sys.stderr.write("### Error Verifying %s\n" % filename) sys.stderr.write(str(e) + "\n") if not outfile: - sys.stder.write("### Error Verifiying %s" % + sys.stderr.write("### Error Verifiying %s" % filename) sys.stderr.write(e) if raise_on_error: @@ -251,19 +291,21 @@ else: def status_msg(do_verify, tot_files, okay_files, failed_files, verify_failed_files, weak_verify): if weak_verify == 'weak': - verification_type = 'weak' + verification_type = 'weak ' + elif weak_verify == 'verify-run': + verification_type = 'run ' else: - verification_type = 'strong' + verification_type = '' if tot_files == 1: if failed_files: return "\n# decompile failed" elif verify_failed_files: - return "\n# decompile %s verification failed" % verification_type + return "\n# decompile %sverification failed" % verification_type else: return "\n# Successfully decompiled file" pass pass mess = "decompiled %i files: %i okay, %i failed" % (tot_files, okay_files, failed_files) if do_verify: - mess += (", %i %s verification failed" % (verify_failed_files, verification_type)) + mess += (", %i %sverification failed" % (verify_failed_files, verification_type)) return mess diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index 287c8575..b25dee14 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -131,6 +131,7 @@ class Python27Parser(Python2Parser): ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel + ifelsestmtl ::= testexpr c_stmts_opt CONTINUE else_suitel # Common with 2.6 return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index bcd93635..417b2f31 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2017 by Rocky Bernstein +# Copyright (c) 2015-2018 by Rocky Bernstein """ Python 2.7 bytecode ingester. diff --git a/uncompyle6/semantics/aligner.py b/uncompyle6/semantics/aligner.py index cc43470c..14e1462e 100644 --- a/uncompyle6/semantics/aligner.py +++ b/uncompyle6/semantics/aligner.py @@ -3,7 +3,7 @@ from uncompyle6.semantics.pysource import ( SourceWalker, SourceWalkerError, find_globals, ASSIGN_DOC_STRING, RETURN_NONE) from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG class AligningWalker(SourceWalker, object): - def __init__(self, version, scanner, out, showast=False, + def __init__(self, version, out, scanner, showast=False, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode='exec', is_pypy=False): SourceWalker.__init__(self, version, out, scanner, showast, debug_parser, diff --git a/uncompyle6/semantics/customize.py b/uncompyle6/semantics/customize.py new file mode 100644 index 00000000..385cd370 --- /dev/null +++ b/uncompyle6/semantics/customize.py @@ -0,0 +1,589 @@ +# Copyright (c) 2018 by Rocky Bernstein + +"""Isolate Python version-specific semantic actions here. +""" + +from uncompyle6.semantics.consts import ( + INDENT_PER_LEVEL, TABLE_R, TABLE_DIRECT) + +from uncompyle6.semantics.make_function import ( + make_function3_annotate, + ) + +from xdis.util import COMPILER_FLAG_BIT +from xdis.code import iscode +from uncompyle6.parsers.astnode import AST +from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.helper import flatten_list + +def customize_for_version(self, is_pypy, version): + if is_pypy: + ######################## + # PyPy changes + ####################### + TABLE_DIRECT.update({ + 'assert_pypy': ( '%|assert %c\n' , 1 ), + 'assert2_pypy': ( '%|assert %c, %c\n' , 1, 4 ), + 'try_except_pypy': ( '%|try:\n%+%c%-%c\n\n', 1, 2 ), + 'tryfinallystmt_pypy': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 3 ), + 'assign3_pypy': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 4, 3, 0, 1, 2 ), + 'assign2_pypy': ( '%|%c, %c = %c, %c\n', 3, 2, 0, 1), + }) + else: + ######################## + # Without PyPy + ####################### + TABLE_DIRECT.update({ + 'assert': ( '%|assert %c\n' , 0 ), + 'assert2': ( '%|assert %c, %c\n' , 0, 3 ), + 'try_except': ( '%|try:\n%+%c%-%c\n\n', 1, 3 ), + 'assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), + 'assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), + }) + if version < 3.0: + TABLE_R.update({ + 'STORE_SLICE+0': ( '%c[:]', 0 ), + 'STORE_SLICE+1': ( '%c[%p:]', 0, (1, 100) ), + 'STORE_SLICE+2': ( '%c[:%p]', 0, (1, 100) ), + 'STORE_SLICE+3': ( '%c[%p:%p]', 0, (1, 100), (2, 100) ), + 'DELETE_SLICE+0': ( '%|del %c[:]\n', 0 ), + 'DELETE_SLICE+1': ( '%|del %c[%c:]\n', 0, 1 ), + 'DELETE_SLICE+2': ( '%|del %c[:%c]\n', 0, 1 ), + 'DELETE_SLICE+3': ( '%|del %c[%c:%c]\n', 0, 1, 2 ), + }) + TABLE_DIRECT.update({ + 'raise_stmt2': ( '%|raise %c, %c\n', 0, 1), + }) + + # exec as a built-in statement is only in Python 2.x + def n_exec_stmt(self, node): + """ + exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT + exec_stmt ::= expr exprlist EXEC_STMT + """ + self.write(self.indent, 'exec ') + self.preorder(node[0]) + if not node[1][0].isNone(): + sep = ' in ' + for subnode in node[1]: + self.write(sep); sep = ", " + self.preorder(subnode) + self.println() + self.prune() # stop recursing + self.n_exec_smt = n_exec_stmt + + else: + TABLE_DIRECT.update({ + # Gotta love Python for its futzing around with syntax like this + 'raise_stmt2': ( '%|raise %c from %c\n', 0, 1), + }) + + if version >= 3.2: + TABLE_DIRECT.update({ + 'del_deref_stmt': ( '%|del %c\n', 0), + 'DELETE_DEREF': ( '%{pattr}', 0 ), + }) + + if version <= 2.4: + TABLE_DIRECT.update({ + 'importmultiple': ( '%|import %c%c\n', 2, 3), + 'import_cont' : ( ', %c', 2), + 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', + (1, 'suite_stmts_opt') , + (5, 'suite_stmts_opt') ) + }) + if version == 2.3: + TABLE_DIRECT.update({ + 'if1_stmt': ( '%|if 1\n%+%c%-', 5 ) + }) + + global NAME_MODULE + NAME_MODULE = AST('stmt', + [ AST('assign', + [ AST('expr', + [Token('LOAD_GLOBAL', pattr='__name__', + offset=0, has_arg=True)]), + AST('store', + [ Token('STORE_NAME', pattr='__module__', + offset=3, has_arg=True)]) + ])]) + pass + if version <= 2.3: + if version <= 2.1: + TABLE_DIRECT.update({ + 'importmultiple': ( '%c', 2 ), + # FIXME: not quite right. We have indiividual imports + # when there is in fact one: "import a, b, ..." + 'imports_cont': ( '%C%,', (1, 100, '\n') ), + }) + pass + pass + pass + elif version >= 2.5: + ######################## + # Import style for 2.5+ + ######################## + TABLE_DIRECT.update({ + 'importmultiple': ( '%|import %c%c\n', 2, 3 ), + 'import_cont' : ( ', %c', 2 ), + # With/as is allowed as "from future" thing in 2.5 + # Note: It is safe to put the variables after "as" in parenthesis, + # and sometimes it is needed. + 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), + 'withasstmt': ( '%|with %c as (%c):\n%+%c%-', 0, 2, 3), + }) + + # In 2.5+ "except" handlers and the "finally" can appear in one + # "try" statement. So the below has the effect of combining the + # "tryfinally" with statement with the "try_except" statement + def tryfinallystmt(node): + if len(node[1][0]) == 1 and node[1][0][0] == 'stmt': + if node[1][0][0][0] == 'try_except': + node[1][0][0][0].kind = 'tf_try_except' + if node[1][0][0][0] == 'tryelsestmt': + node[1][0][0][0].kind = 'tf_tryelsestmt' + self.default(node) + self.n_tryfinallystmt = tryfinallystmt + + ######################################## + # Python 2.6+ + # except as + # vs. older: + # except , + # + # For 2.6 we use the older syntax which + # matches how we parse this in bytecode + ######################################## + if version > 2.6: + TABLE_DIRECT.update({ + 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), + }) + else: + TABLE_DIRECT.update({ + 'except_cond3': ( '%|except %c, %c:\n', 1, 6 ), + 'testtrue_then': ( 'not %p', (0, 22) ), + + }) + + if 2.4 <= version <= 2.6: + TABLE_DIRECT.update({ + 'comp_for': ( ' for %c in %c', 3, 1 ), + }) + else: + TABLE_DIRECT.update({ + 'comp_for': ( ' for %c in %c%c', 2, 0, 3 ), + }) + + if version >= 3.0: + TABLE_DIRECT.update({ + 'function_def_annotate': ( '\n\n%|def %c%c\n', -1, 0), + 'store_locals': ( '%|# inspect.currentframe().f_locals = __locals__\n', ), + }) + + def n_mkfunc_annotate(node): + + if self.version >= 3.3 or node[-2] == 'kwargs': + # LOAD_CONST code object .. + # LOAD_CONST 'x0' if >= 3.3 + # EXTENDED_ARG + # MAKE_FUNCTION .. + code = node[-4] + elif node[-3] == 'expr': + code = node[-3][0] + else: + # LOAD_CONST code object .. + # MAKE_FUNCTION .. + code = node[-3] + + self.indent_more() + for annotate_last in range(len(node)-1, -1, -1): + if node[annotate_last] == 'annotate_tuple': + break + + # FIXME: the real situation is that when derived from + # function_def_annotate we the name has been filled in. + # But when derived from funcdefdeco it hasn't Would like a better + # way to distinquish. + if self.f.getvalue()[-4:] == 'def ': + self.write(code.attr.co_name) + + # FIXME: handle and pass full annotate args + make_function3_annotate(self, node, is_lambda=False, + codeNode=code, annotate_last=annotate_last) + + if len(self.param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') + self.indent_less() + self.prune() # stop recursing + self.n_mkfunc_annotate = n_mkfunc_annotate + + if version >= 3.4: + ######################## + # Python 3.4+ Additions + ####################### + TABLE_DIRECT.update({ + 'LOAD_CLASSDEREF': ( '%{pattr}', ), + }) + ######################## + # Python 3.5+ Additions + ####################### + if version >= 3.5: + TABLE_DIRECT.update({ + 'await_expr': ( 'await %c', 0), + 'await_stmt': ( '%|%c\n', 0), + 'async_for_stmt': ( + '%|async for %c in %c:\n%+%c%-\n\n', 9, 1, 25 ), + 'async_forelse_stmt': ( + '%|async for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 9, 1, 25, 28 ), + 'async_with_stmt': ( + '%|async with %c:\n%+%c%-', 0, 7), + 'async_with_as_stmt': ( + '%|async with %c as %c:\n%+%c%-', 0, 6, 7), + 'unmap_dict': ( '{**%C}', (0, -1, ', **') ), + # 'unmapexpr': ( '{**%c}', 0), # done by n_unmapexpr + + }) + + def async_call(node): + self.f.write('async ') + node.kind == 'call' + p = self.prec + self.prec = 80 + self.template_engine(('%c(%P)', 0, + (1, -4, ', ', 100)), node) + self.prec = p + node.kind == 'async_call' + self.prune() + self.n_async_call = async_call + self.n_build_list_unpack = self.n_list + + if version == 3.5: + def n_call(node): + mapping = self._get_mapping(node) + table = mapping[0] + key = node + for i in mapping[1:]: + key = key[i] + pass + if key.kind.startswith('CALL_FUNCTION_VAR_KW'): + # Python 3.5 changes the stack position of *args. kwargs come + # after *args whereas in earlier Pythons, *args is at the end + # which simpilfiies things from our perspective. + # Python 3.6+ replaces CALL_FUNCTION_VAR_KW with CALL_FUNCTION_EX + # We will just swap the order to make it look like earlier Python 3. + entry = table[key.kind] + kwarg_pos = entry[2][1] + args_pos = kwarg_pos - 1 + # Put last node[args_pos] after subsequent kwargs + while node[kwarg_pos] == 'kwarg' and kwarg_pos < len(node): + # swap node[args_pos] with node[kwargs_pos] + node[kwarg_pos], node[args_pos] = node[args_pos], node[kwarg_pos] + args_pos = kwarg_pos + kwarg_pos += 1 + self.default(node) + self.n_call = n_call + + def n_function_def(node): + if self.version == 3.6: + code_node = node[0][0] + else: + code_node = node[0][1] + + is_code = hasattr(code_node, 'attr') and iscode(code_node.attr) + if (is_code and + (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])): + self.template_engine(('\n\n%|async def %c\n', + -2), node) + else: + self.template_engine(('\n\n%|def %c\n', -2), + node) + self.prune() + self.n_function_def = n_function_def + + def unmapexpr(node): + last_n = node[0][-1] + for n in node[0]: + self.preorder(n) + if n != last_n: + self.f.write(', **') + pass + pass + self.prune() + pass + self.n_unmapexpr = unmapexpr + + if version >= 3.6: + ######################## + # Python 3.6+ Additions + ####################### + + TABLE_DIRECT.update({ + 'fstring_expr': ( "{%c%{conversion}}", 0), + 'fstring_single': ( "f'{%c%{conversion}}'", 0), + 'fstring_multi': ( "f'%c'", 0), + 'func_args36': ( "%c(**", 0), + 'try_except36': ( '%|try:\n%+%c%-%c\n\n', 1, 2 ), + 'unpack_list': ( '*%c', (0, 'list') ), + 'starred': ( '*%c', (0, 'expr') ), + 'call_ex' : ( + '%c(%c)', + (0, 'expr'), 1), + 'call_ex_kw' : ( + '%c(%c)', + (0, 'expr'), 2), + + }) + + TABLE_R.update({ + 'CALL_FUNCTION_EX': ('%c(*%P)', 0, (1, 2, ', ', 100)), + # Not quite right + 'CALL_FUNCTION_EX_KW': ('%c(**%C)', 0, (2, 3, ',')), + }) + + def build_unpack_tuple_with_call(node): + + if node[0] == 'expr': + tup = node[0][0] + else: + tup = node[0] + pass + assert tup == 'tuple' + self.call36_tuple(tup) + + buwc = node[-1] + assert buwc.kind.startswith('BUILD_TUPLE_UNPACK_WITH_CALL') + for n in node[1:-1]: + self.f.write(', *') + self.preorder(n) + pass + self.prune() + return + self.n_build_tuple_unpack_with_call = build_unpack_tuple_with_call + + def build_unpack_map_with_call(node): + n = node[0] + if n == 'expr': + n = n[0] + if n == 'dict': + self.call36_dict(n) + first = 1 + sep = ', **' + else: + first = 0 + sep = '**' + for n in node[first:-1]: + self.f.write(sep) + self.preorder(n) + sep = ', **' + pass + self.prune() + return + self.n_build_map_unpack_with_call = build_unpack_map_with_call + + def call_ex_kw2(node): + """Handle CALL_FUNCTION_EX 2 (have KW) but with + BUILD_{MAP,TUPLE}_UNPACK_WITH_CALL""" + + # This is weird shit. Thanks Python! + self.preorder(node[0]) + self.write('(') + + assert node[1] == 'build_tuple_unpack_with_call' + btuwc = node[1] + tup = btuwc[0] + if tup == 'expr': + tup = tup[0] + assert tup == 'tuple' + self.call36_tuple(tup) + assert node[2] == 'build_map_unpack_with_call' + + self.write(', ') + d = node[2][0] + if d == 'expr': + d = d[0] + assert d == 'dict' + self.call36_dict(d) + + args = btuwc[1] + self.write(', *') + self.preorder(args) + + self.write(', **') + star_star_args = node[2][1] + if star_star_args == 'expr': + star_star_args = star_star_args[0] + self.preorder(star_star_args) + self.write(')') + self.prune() + self.n_call_ex_kw2 = call_ex_kw2 + + def call_ex_kw3(node): + """Handle CALL_FUNCTION_EX 2 (have KW) but without + BUILD_{MAP,TUPLE}_UNPACK_WITH_CALL""" + self.preorder(node[0]) + self.write('(') + args = node[1][0] + if args == 'tuple': + if self.call36_tuple(args) > 0: + self.write(', ') + pass + pass + else: + self.write('*') + self.preorder(args) + self.write(', ') + pass + + kwargs = node[2] + if kwargs == 'expr': + kwargs = kwargs[0] + self.write('**') + self.preorder(kwargs) + self.write(')') + self.prune() + self.n_call_ex_kw3 = call_ex_kw3 + + + def call36_tuple(node): + """ + A tuple used in a call, these are like normal tuples but they + don't have the enclosing parenthesis. + """ + assert node == 'tuple' + # Note: don't iterate over last element which is a + # BUILD_TUPLE... + flat_elems = flatten_list(node[:-1]) + + self.indent_more(INDENT_PER_LEVEL) + sep = '' + + for elem in flat_elems: + if elem in ('ROT_THREE', 'EXTENDED_ARG'): + continue + assert elem == 'expr' + line_number = self.line_number + value = self.traverse(elem) + if line_number != self.line_number: + sep += '\n' + self.indent + INDENT_PER_LEVEL[:-1] + self.write(sep, value) + sep = ', ' + + self.indent_less(INDENT_PER_LEVEL) + return len(flat_elems) + self.call36_tuple = call36_tuple + + def call36_dict(node): + """ + A dict used in a call_ex_kw2, which are a dictionary items expressed + in a call. This should format to: + a=1, b=2 + In other words, no braces, no quotes around keys and ":" becomes + "=". + + We will source-code use line breaks to guide us when to break. + """ + p = self.prec + self.prec = 100 + + self.indent_more(INDENT_PER_LEVEL) + sep = INDENT_PER_LEVEL[:-1] + line_number = self.line_number + + assert node[0].kind.startswith('kvlist') + # Python 3.5+ style key/value list in dict + kv_node = node[0] + l = list(kv_node) + i = 0 + # Respect line breaks from source + while i < len(l): + self.write(sep) + name = self.traverse(l[i], indent='') + # Strip off beginning and trailing quotes in name + name = name[1:-1] + if i > 0: + line_number = self.indent_if_source_nl(line_number, + self.indent + INDENT_PER_LEVEL[:-1]) + line_number = self.line_number + self.write(name, '=') + value = self.traverse(l[i+1], indent=self.indent+(len(name)+2)*' ') + self.write(value) + sep = "," + if line_number != self.line_number: + sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1] + line_number = self.line_number + i += 2 + pass + self.prec = p + self.indent_less(INDENT_PER_LEVEL) + return + self.call36_dict = call36_dict + + + FSTRING_CONVERSION_MAP = {1: '!s', 2: '!r', 3: '!a'} + + def f_conversion(node): + node.conversion = FSTRING_CONVERSION_MAP.get(node.data[1].attr, '') + + def fstring_expr(node): + f_conversion(node) + self.default(node) + self.n_fstring_expr = fstring_expr + + def fstring_single(node): + f_conversion(node) + self.default(node) + self.n_fstring_single = fstring_single + + # def kwargs_only_36(node): + # keys = node[-1].attr + # num_kwargs = len(keys) + # values = node[:num_kwargs] + # for i, (key, value) in enumerate(zip(keys, values)): + # self.write(key + '=') + # self.preorder(value) + # if i < num_kwargs: + # self.write(',') + # self.prune() + # return + # self.n_kwargs_only_36 = kwargs_only_36 + + def kwargs_36(node): + self.write('(') + keys = node[-1].attr + num_kwargs = len(keys) + num_posargs = len(node) - (num_kwargs + 1) + n = len(node) + assert n >= len(keys)+2 + sep = '' + # FIXME: adjust output for line breaks? + for i in range(num_posargs): + self.write(sep) + self.preorder(node[i]) + sep = ', ' + + i = num_posargs + j = 0 + # FIXME: adjust output for line breaks? + while i < n-1: + self.write(sep) + self.write(keys[j] + '=') + self.preorder(node[i]) + i += 1 + j += 1 + self.write(')') + self.prune() + return + self.n_kwargs_36 = kwargs_36 + + + def return_closure(node): + # Nothing should be output here + self.prune() + return + self.n_return_closure = return_closure + pass # version > 3.6 + pass # version > 3.4 + pass # version > 3.0 + return diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 10db8b34..1bd0b2d0 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2017 by Rocky Bernstein +# Copyright (c) 2015-2018 by Rocky Bernstein # Copyright (c) 1999 John Aycock """ @@ -93,7 +93,10 @@ TABLE_DIRECT_FRAGMENT = { 'raise_stmt0': ( '%|%rraise\n', ), 'import': ( '%|import %c%x\n', 2, (2, (0, 1)), ), 'importfrom': ( '%|from %[2]{pattr}%x import %c\n', (2, (0, 1)), 3), + + # FIXME only in <= 2.4 'importmultiple': ( '%|import%b %c%c\n', 0, 2, 3 ), + 'list_for': (' for %c%x in %c%c', 2, (2, (1, )), 0, 3 ), 'forelsestmt': ( '%|for %c%x in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, (3, (2,)), 1, 4, -2), @@ -197,7 +200,7 @@ class FragmentsWalker(pysource.SourceWalker, object): n_aug_assign_1 = n_print_item = exec_stmt = print_to_item = del_stmt = table_r_node n_classdefco1 = n_classdefco2 = except_cond1 = except_cond2 = table_r_node - def n_passtmt(self, node): + def n_pass(self, node): start = len(self.f.getvalue()) + len(self.indent) self.set_pos_info(node, start, start+len("pass")) self.default(node) @@ -257,11 +260,12 @@ class FragmentsWalker(pysource.SourceWalker, object): def n_yield(self, node): start = len(self.f.getvalue()) - self.write('yield') + try: + super(FragmentsWalker, self).n_yield(node) + except GenericASTTraversalPruningException: + pass if node != AST('yield', [NONE, Token('YIELD_VALUE')]): - self.write(' ') node[0].parent = node - self.preorder(node[0]) self.set_pos_info(node[-1], start, len(self.f.getvalue())) self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() # stop recursing @@ -269,41 +273,29 @@ class FragmentsWalker(pysource.SourceWalker, object): # In Python 3.3+ only def n_yield_from(self, node): start = len(self.f.getvalue()) - self.write('yield from') - self.write(' ') - node[0].parent = node + try: + super(FragmentsWalker, self).n_yield(node) + except GenericASTTraversalPruningException: + pass self.preorder(node[0]) self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() # stop recursing def n_buildslice3(self, node): start = len(self.f.getvalue()) - p = self.prec - self.prec = 100 - if node[0] != NONE: - self.preorder(node[0]) - self.write(':') - if node[1] != NONE: - self.preorder(node[1]) - self.write(':') - if node[2] != NONE: - self.preorder(node[2]) - self.prec = p + try: + super(FragmentsWalker, self).n_buildslice3(node) + except GenericASTTraversalPruningException: + pass self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() # stop recursing def n_buildslice2(self, node): start = len(self.f.getvalue()) - p = self.prec - self.prec = 100 - if node[0] != NONE: - node[0].parent = node - self.preorder(node[0]) - self.write(':') - if node[1] != NONE: - node[1].parent = node - self.preorder(node[1]) - self.prec = p + try: + super(FragmentsWalker, self).n_buildslice2(node) + except GenericASTTraversalPruningException: + pass self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() # stop recursing @@ -341,54 +333,27 @@ class FragmentsWalker(pysource.SourceWalker, object): def n_ret_expr(self, node): start = len(self.f.getvalue()) - if len(node) == 1 and node[0] == 'expr': - node[0].parent = node - self.n_expr(node[0]) - else: - self.n_expr(node) + super(FragmentsWalker, self).n_ret_expr(node) self.set_pos_info(node, start, len(self.f.getvalue())) def n_binary_expr(self, node): start = len(self.f.getvalue()) node[0].parent = node self.last_finish = len(self.f.getvalue()) - self.preorder(node[0]) - self.write(' ') - node[-1].parent = node - self.preorder(node[-1]) - self.write(' ') - self.prec -= 1 - node[1].parent = node - self.preorder(node[1]) - self.prec += 1 + try: + super(FragmentsWalker, self).n_binary_expr(node) + except GenericASTTraversalPruningException: + pass self.set_pos_info(node, start, len(self.f.getvalue())) self.prune() def n_LOAD_CONST(self, node): start = len(self.f.getvalue()) - data = node.pattr; datatype = type(data) - if isinstance(data, float) and str(data) in frozenset(['nan', '-nan', 'inf', '-inf']): - # float values 'nan' and 'inf' are not directly representable in Python at least - # before 3.5 and even there it is via a library constant. - # So we will canonicalize their representation as float('nan') and float('inf') - self.write("float('%s')" % data) - elif isinstance(datatype, int) and data == minint: - # convert to hex, since decimal representation - # would result in 'LOAD_CONST; UNARY_NEGATIVE' - # change:hG/2002-02-07: this was done for all negative integers - # todo: check whether this is necessary in Python 2.1 - self.write( hex(data) ) - elif datatype is type(Ellipsis): - self.write('...') - elif data is None: - # LOAD_CONST 'None' only occurs, when None is - # implicit eg. in 'return' w/o params - # pass - self.write('None') - else: - self.write(repr(data)) + try: + super(FragmentsWalker, self).n_LOAD_CONST(node) + except GenericASTTraversalPruningException: + pass self.set_pos_info(node, start, len(self.f.getvalue())) - # LOAD_CONST is a terminal, so stop processing/recursing early self.prune() def n_exec_stmt(self, node): @@ -397,16 +362,12 @@ class FragmentsWalker(pysource.SourceWalker, object): exec_stmt ::= expr exprlist EXEC_STMT """ start = len(self.f.getvalue()) + len(self.indent) - self.write(self.indent, 'exec ') - self.preorder(node[0]) - if node[1][0] != NONE: - sep = ' in ' - for subnode in node[1]: - self.write(sep); sep = ", " - self.preorder(subnode) + try: + super(FragmentsWalker, self).n_exec_stmt(node) + except GenericASTTraversalPruningException: + pass self.set_pos_info(node, start, len(self.f.getvalue())) self.set_pos_info(node[-1], start, len(self.f.getvalue())) - self.println() self.prune() # stop recursing def n_ifelsestmtr(self, node): @@ -1626,6 +1587,12 @@ class FragmentsWalker(pysource.SourceWalker, object): elif typ == '{': d = node.__dict__ expr = m.group('expr') + + # Line mapping stuff + if (hasattr(node, 'linestart') and node.linestart + and hasattr(node, 'current_line_number')): + self.source_linemap[self.current_line_number] = node.linestart + # Additional fragment-position stuff try: start = len(self.f.getvalue()) self.write(eval(expr, d, d)) @@ -1659,7 +1626,7 @@ class FragmentsWalker(pysource.SourceWalker, object): pass def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, - showgrammar=False, is_pypy=False): + showgrammar=False, is_pypy=False, walker=FragmentsWalker): """ Convert the code object co into a python source fragment. @@ -1699,7 +1666,8 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, # Build AST from disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) - deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser) + deparsed = walker(version, scanner, showast=showast, + debug_parser=debug_parser) deparsed.ast = deparsed.build_ast(tokens, customize) diff --git a/uncompyle6/semantics/linemap.py b/uncompyle6/semantics/linemap.py new file mode 100644 index 00000000..4eae0643 --- /dev/null +++ b/uncompyle6/semantics/linemap.py @@ -0,0 +1,87 @@ +# Copyright (c) 2018 by Rocky Bernstein +from uncompyle6.semantics.pysource import SourceWalker, deparse_code +import uncompyle6.semantics.fragments as fragments + +# FIXME: does this handle nested code, and lambda properly +class LineMapWalker(SourceWalker): + def __init__(self, *args, **kwargs): + super(LineMapWalker, self).__init__(*args, **kwargs) + self.source_linemap = {} + self.current_line_number = 1 + + def write(self, *data): + """Augment write routine to keep track of current line""" + for l in data: + ## print("XXX write: '%s'" % l) + for i in str(l): + if i == '\n': + self.current_line_number += 1 + pass + pass + pass + return super(LineMapWalker, self).write(*data) + + # Note n_expr needs treatment too + + def default(self, node): + """Augment write default routine to record line number changes""" + if hasattr(node, 'linestart'): + if node.linestart: + self.source_linemap[self.current_line_number] = node.linestart + return super(LineMapWalker, self).default(node) + + def n_LOAD_CONST(self, node): + if hasattr(node, 'linestart'): + if node.linestart: + self.source_linemap[self.current_line_number] = node.linestart + return super(LineMapWalker, self).n_LOAD_CONST(node) + + +class LineMapFragmentWalker(fragments.FragmentsWalker, LineMapWalker): + def __init__(self, *args, **kwargs): + super(LineMapFragmentWalker, self).__init__(*args, **kwargs) + self.source_linemap = {} + self.current_line_number = 0 + +def deparse_code_with_map(*args, **kwargs): + """ + Like deparse_code but saves line number correspondences. + """ + kwargs['walker'] = LineMapWalker + return deparse_code(*args, **kwargs) + +def deparse_code_with_fragments_and_map(*args, **kwargs): + """ + Like deparse_code_with_map but saves fragments. + """ + kwargs['walker'] = LineMapFragmentWalker + return fragments.deparse_code(*args, **kwargs) + +if __name__ == '__main__': + def deparse_test(co): + "This is a docstring" + import sys + sys_version = float(sys.version[0:3]) + # deparsed = deparse_code(sys_version, co, showasm=True, showast=True) + deparsed = deparse_code_with_map(sys_version, co, showasm=False, + showast=False, + showgrammar=False) + a = 1; b = 2 + print("\n") + linemap = [(line_no, deparsed.source_linemap[line_no]) + for line_no in + sorted(deparsed.source_linemap.keys())] + print(linemap) + deparsed = deparse_code_with_fragments_and_map(sys_version, + co, showasm=False, + showast=False, + showgrammar=False) + a = 1; b = 2 + print("\n") + linemap2 = [(line_no, deparsed.source_linemap[line_no]) + for line_no in + sorted(deparsed.source_linemap.keys())] + print(linemap2) + assert linemap == linemap2 + return + deparse_test(deparse_test.__code__) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 47fa481f..5f212350 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -126,6 +126,7 @@ from uncompyle6.semantics.make_function import ( ) from uncompyle6.semantics.parser_error import ParserError from uncompyle6.semantics.check_ast import checker +from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.helper import ( print_docstring, find_globals, flatten_list) from uncompyle6.scanners.tok import Token @@ -211,7 +212,7 @@ class SourceWalker(GenericASTTraversal, object): self.classes = [] self.pending_newlines = 0 self.linestarts = linestarts - self.line_number = 0 + self.line_number = 1 self.ast_errors = [] # FIXME: have p.insts update in a better way # modularity is broken here @@ -233,7 +234,7 @@ class SourceWalker(GenericASTTraversal, object): self.name = None self.version = version self.is_pypy = is_pypy - self.customize_for_version(is_pypy, version) + customize_for_version(self, is_pypy, version) return @@ -320,7 +321,6 @@ class SourceWalker(GenericASTTraversal, object): if version <= 2.1: TABLE_DIRECT.update({ 'importmultiple': ( '%c', 2 ), - 'imports_cont': ( '%c', 2 ), # FIXME: not quite right. We have indiividual imports # when there is in fact one: "import a, b, ..." 'imports_cont': ( '%C%,', (1, 100, '\n') ), @@ -997,6 +997,10 @@ class SourceWalker(GenericASTTraversal, object): else: n = node[0] + # if (hasattr(n, 'linestart') and n.linestart and + # hasattr(self, 'current_line_number')): + # self.source_linemap[self.current_line_number] = n.linestart + self.prec = PRECEDENCE.get(n.kind, -2) if n == 'LOAD_CONST' and repr(n.pattr)[0] == '-': self.prec = 6 @@ -1087,15 +1091,11 @@ class SourceWalker(GenericASTTraversal, object): # xxx' -> b'xxx' if not PYTHON3 and isinstance(data, unicode): try: - try: - data = str(data) - except UnicodeEncodeError: - # Have to keep data as it is: in Unicode. - pass - self.write(repr(data)) - except: - from trepan.api import debug; debug() - self.write(repr(data)) + data = str(data) + except UnicodeEncodeError: + # Have to keep data as it is: in Unicode. + pass + self.write(repr(data)) elif isinstance(data, str): self.write('b'+repr(data)) else: @@ -1113,6 +1113,10 @@ class SourceWalker(GenericASTTraversal, object): n_store_subscr = n_subscript = n_delete_subscr + # Note: this node is only in Python 2.x + # FIXME: figure out how to get this into customization + # put so that we can get access via super from + # the fragments routine. def n_exec_stmt(self, node): """ exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT @@ -2296,6 +2300,12 @@ class SourceWalker(GenericASTTraversal, object): elif typ == '{': d = node.__dict__ expr = m.group('expr') + + # Line mapping stuff + if (hasattr(node, 'linestart') and node.linestart + and hasattr(node, 'current_line_number')): + self.source_linemap[self.current_line_number] = node.linestart + try: self.write(eval(expr, d, d)) except: @@ -2595,7 +2605,8 @@ class SourceWalker(GenericASTTraversal, object): def deparse_code(version, co, out=sys.stdout, showasm=None, showast=False, - showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False): + showgrammar=False, code_objects={}, compile_mode='exec', + is_pypy=False, walker=SourceWalker): """ ingests and deparses a given code block 'co' """ @@ -2614,10 +2625,9 @@ def deparse_code(version, co, out=sys.stdout, showasm=None, showast=False, # Build AST from disassembly. linestarts = dict(scanner.opc.findlinestarts(co)) - deparsed = SourceWalker(version, out, scanner, showast=showast, - debug_parser=debug_parser, compile_mode=compile_mode, - is_pypy=is_pypy, - linestarts=linestarts) + deparsed = walker(version, out, scanner, showast=showast, + debug_parser=debug_parser, compile_mode=compile_mode, + is_pypy=is_pypy, linestarts=linestarts) isTopLevel = co.co_name == '' deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel) diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 480c8962..c27c9ec2 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -1,16 +1,17 @@ # +# (C) Copyright 2015-2018 by Rocky Bernstein # (C) Copyright 2000-2002 by hartmut Goebel -# (C) Copyright 2015-2017 by Rocky Bernstein # """ byte-code verification """ -import operator +import operator, sys import xdis.std as dis +from subprocess import call import uncompyle6 -import uncompyle6.scanner as scanner +from uncompyle6.scanner import (Token as ScannerToken, get_scanner) from uncompyle6 import PYTHON3 from xdis.code import iscode from xdis.magics import PYTHON_MAGIC_INT @@ -132,8 +133,8 @@ class CmpErrorMember(VerifyCmpError): # these members are ignored __IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names'] -def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, - name='', ignore_code=False): +def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, + name=''): """ Compare two code-objects. @@ -178,53 +179,12 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, tokens1 = None for member in members: - if member in __IGNORE_CODE_MEMBERS__ or ignore_code: + if member in __IGNORE_CODE_MEMBERS__ or verify != 'verify': pass - elif member == 'co_code' and not ignore_code: - if version == 2.3: - import uncompyle6.scanners.scanner23 as scan - scanner = scan.Scanner23(show_asm=False) - elif version == 2.4: - import uncompyle6.scanners.scanner24 as scan - scanner = scan.Scanner24(show_asm=False) - elif version == 2.5: - import uncompyle6.scanners.scanner25 as scan - scanner = scan.Scanner25(show_asm=False) - elif version == 2.6: - import uncompyle6.scanners.scanner26 as scan - scanner = scan.Scanner26(show_asm=False) - elif version == 2.7: - if is_pypy: - import uncompyle6.scanners.pypy27 as scan - scanner = scan.ScannerPyPy27(show_asm=False) - else: - import uncompyle6.scanners.scanner27 as scan - scanner = scan.Scanner27() - elif version == 3.0: - import uncompyle6.scanners.scanner30 as scan - scanner = scan.Scanner30() - elif version == 3.1: - import uncompyle6.scanners.scanner32 as scan - scanner = scan.Scanner32() - elif version == 3.2: - if is_pypy: - import uncompyle6.scanners.pypy32 as scan - scanner = scan.ScannerPyPy32() - else: - import uncompyle6.scanners.scanner32 as scan - scanner = scan.Scanner32() - elif version == 3.3: - import uncompyle6.scanners.scanner33 as scan - scanner = scan.Scanner33() - elif version == 3.4: - import uncompyle6.scanners.scanner34 as scan - scanner = scan.Scanner34() - elif version == 3.5: - import uncompyle6.scanners.scanner35 as scan - scanner = scan.Scanner35() - elif version == 3.6: - import uncompyle6.scanners.scanner36 as scan - scanner = scan.Scanner36() + elif member == 'co_code': + if verify != 'strong': + continue + scanner = get_scanner(version, is_pypy, show_asm=False) global JUMP_OPS JUMP_OPS = list(scan.JUMP_OPS) + ['JUMP_BACK'] @@ -365,7 +325,8 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') ) for c1, c2 in zip(codes1, codes2): - cmp_code_objects(version, is_pypy, c1, c2, name=name) + cmp_code_objects(version, is_pypy, c1, c2, verify, + name=name) elif member == 'co_flags': flags1 = code_obj1.co_flags flags2 = code_obj2.co_flags @@ -390,7 +351,7 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, getattr(code_obj1, member), getattr(code_obj2, member)) -class Token(scanner.Token): +class Token(ScannerToken): """Token class with changed semantics for 'cmp()'.""" def __cmp__(self, o): t = self.kind # shortcut @@ -416,8 +377,10 @@ class Token(scanner.Token): def __str__(self): return '%s\t%-17s %r' % (self.offset, self.kind, self.pattr) -def compare_code_with_srcfile(pyc_filename, src_filename, weak_verify=False): - """Compare a .pyc with a source code file.""" +def compare_code_with_srcfile(pyc_filename, src_filename, verify): + """Compare a .pyc with a source code file. If everything is okay, None + is returned. Otherwise a string message describing the mismatch is returned. + """ (version, timestamp, magic_int, code_obj1, is_pypy, source_size) = load_module(pyc_filename) if magic_int != PYTHON_MAGIC_INT: @@ -431,17 +394,27 @@ def compare_code_with_srcfile(pyc_filename, src_filename, weak_verify=False): if version == 2.4: print(pyc_filename) return str(e).replace(src_filename, pyc_filename) - cmp_code_objects(version, is_pypy, code_obj1, code_obj2, ignore_code=weak_verify) + cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify) + if verify == 'verify-run': + try: + retcode = call("%s %s" % (sys.executable, src_filename), shell=True) + if retcode != 0: + return "Child was terminated by signal %d" % retcode + pass + except OSError, e: + return "Execution failed: %s" % e + pass return None -def compare_files(pyc_filename1, pyc_filename2, weak_verify=False): +def compare_files(pyc_filename1, pyc_filename2, verify): """Compare two .pyc files.""" (version1, timestamp, magic_int1, code_obj1, is_pypy, source_size) = uncompyle6.load_module(pyc_filename1) (version2, timestamp, magic_int2, code_obj2, is_pypy, - source_size) = uncompyle6.load_module(pyc_filename2) - weak_verify = weak_verify or (magic_int1 != magic_int2) - cmp_code_objects(version1, is_pypy, code_obj1, code_obj2, ignore_code=weak_verify) + source_size) = uncompyle6.load_module(pyc_filename2) + if (magic_int1 != magic_int2) and verify == 'verify': + verify = 'weak_verify' + cmp_code_objects(version1, is_pypy, code_obj1, code_obj2, verify) if __name__ == '__main__': t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52)