diff --git a/.circleci/config.yml b/.circleci/config.yml index 800965bd..08b024e7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,9 +27,9 @@ jobs: # VM instead of a container) see https://circleci.com/docs/2.0/executor-types/ # To see the list of pre-built images that CircleCI provides for most common languages see # https://circleci.com/docs/2.0/circleci-images/ - docker: - - image: circleci/build-image:ubuntu-14.04-XXL-upstart-1189-5614f37 - command: /sbin/init + machine: + python: + version: 2.7.14 steps: # Machine Setup # If you break your build into multiple jobs with workflows, you will probably want to do the parts of this that are relevant in each @@ -48,17 +48,17 @@ jobs: # Restore the dependency cache - restore_cache: keys: - # This branch if available - - v1-dep-{{ .Branch }}- - # Default branch if not - - v1-dep-master- - # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly - - v1-dep- + - v2-dependencies-{{ .Branch }}- + # fallback to using the latest cache if no exact match is found + - v2-dependencies- + # This is based on your 1.0 configuration file or project settings - - run: easy_install spark_parser==1.8.5 && easy_install xdis==3.8.4 + - run: pip install -e . + - run: pip install -r requirements-dev.txt + # Save dependency cache - save_cache: - key: v1-dep-{{ .Branch }}-{{ epoch }} + key: v2-dependencies-{{ .Branch }}-{{ epoch }} paths: # This is a broad list of cache paths to include many possible development environments # You can probably delete some of these entries @@ -67,9 +67,8 @@ jobs: - ~/.m2 - ~/.ivy2 - ~/.bundle - - ~/.go_workspace - - ~/.gradle - ~/.cache/bower + # Test # This would typically be a build job when using workflows, possibly combined with build # This is based on your 1.0 configuration file or project settings diff --git a/NEWS.md b/NEWS.md index b0cd0057..eacbd64c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,22 @@ +3.4.0 2019-08-24 Totoro +======================= + +The main change is to add a tree-transformation phase. This simplifies the +code a little and allows us to turn `if ...: raise AssertionError` into +`assert`, and many `if ..: else if ...` into `if ... elif ..` + +Use options `--show=before` and `--show=after` to see the before the tree transformation phase and after the tree transformation phase. + +Most of the heavy lifting for this was done by x0ret. + +Other changes: + +- Fix issue #275, #283 (process to fix this bug is documented on wiki), #284 +- blacken more code +- CircleCI adjustments for a changing CircleCi +- Require more recent `xdis` for Python 3.8 +- Fix bugs in code using `BUILD_LIST_UNPACK` and variants + 3.3.5 2019-07-03 Pre Independence Day ===================================== diff --git a/__pkginfo__.py b/__pkginfo__.py index 69929d1f..44a342f6 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -26,46 +26,46 @@ copyright = """ Copyright (C) 2015-2019 Rocky Bernstein . """ -classifiers = ['Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2.4', - 'Programming Language :: Python :: 2.5', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.0', - 'Programming Language :: Python :: 3.1', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Software Development :: Debuggers', - 'Topic :: Software Development :: Libraries :: Python Modules', +classifiers = ["Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 2.4", + "Programming Language :: Python :: 2.5", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.0", + "Programming Language :: Python :: 3.1", + "Programming Language :: Python :: 3.2", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Software Development :: Debuggers", + "Topic :: Software Development :: Libraries :: Python Modules", ] # The rest in alphabetic order author = "Rocky Bernstein, Hartmut Goebel, John Aycock, and others" author_email = "rb@dustyfeet.com" entry_points = { - 'console_scripts': [ - 'uncompyle6=uncompyle6.bin.uncompile:main_bin', - 'pydisassemble=uncompyle6.bin.pydisassemble:main', + "console_scripts": [ + "uncompyle6=uncompyle6.bin.uncompile:main_bin", + "pydisassemble=uncompyle6.bin.pydisassemble:main", ]} ftp_url = None -install_requires = ['spark-parser >= 1.8.9, < 1.9.0', - 'xdis >= 4.0.3, < 4.1.0'] +install_requires = ["spark-parser >= 1.8.9, < 1.9.0", + "xdis >= 4.0.3, < 4.1.0"] -license = 'GPL3' -mailing_list = 'python-debugger@googlegroups.com' -modname = 'uncompyle6' +license = "GPL3" +mailing_list = "python-debugger@googlegroups.com" +modname = "uncompyle6" py_modules = None -short_desc = 'Python cross-version byte-code decompiler' -web = 'https://github.com/rocky/python-uncompyle6/' +short_desc = "Python cross-version byte-code decompiler" +web = "https://github.com/rocky/python-uncompyle6/" # tracebacks in zip files are funky and not debuggable zip_safe = True @@ -82,5 +82,5 @@ def read(*rnames): return open(os.path.join(srcdir, *rnames)).read() # Get info from files; set: long_description and VERSION -long_description = ( read("README.rst") + '\n' ) -exec(read('uncompyle6/version.py')) +long_description = ( read("README.rst") + "\n" ) +exec(read("uncompyle6/version.py")) diff --git a/pytest/validate.py b/pytest/validate.py index 84e6e4b2..29707e8e 100644 --- a/pytest/validate.py +++ b/pytest/validate.py @@ -1,16 +1,20 @@ # future from __future__ import print_function + # std import os import difflib import subprocess import tempfile import functools + # uncompyle6 / xdis from uncompyle6 import PYTHON_VERSION, PYTHON3, IS_PYPY, code_deparse + # TODO : I think we can get xdis to support the dis api (python 3 version) by doing something like this there from xdis.bytecode import Bytecode from xdis.main import get_opcode + opc = get_opcode(PYTHON_VERSION, IS_PYPY) Bytecode = functools.partial(Bytecode, opc=opc) import six @@ -20,6 +24,7 @@ if PYTHON3: else: from StringIO import StringIO + def _dis_to_text(co): return Bytecode(co).dis() @@ -33,36 +38,32 @@ def print_diff(original, uncompyled): :param original: Text describing the original code object. :param uncompyled: Text describing the uncompyled code object. """ - original_lines = original.split('\n') - uncompyled_lines = uncompyled.split('\n') - args = original_lines, uncompyled_lines, 'original', 'uncompyled' + original_lines = original.split("\n") + uncompyled_lines = uncompyled.split("\n") + args = original_lines, uncompyled_lines, "original", "uncompyled" try: from bs4 import BeautifulSoup + diff = difflib.HtmlDiff().make_file(*args) diff = BeautifulSoup(diff, "html.parser") diff.select_one('table[summary="Legends"]').extract() except ImportError: - print('\nTo display diff highlighting run:\n pip install BeautifulSoup4') + print("\nTo display diff highlighting run:\n pip install BeautifulSoup4") diff = difflib.HtmlDiff().make_table(*args) with tempfile.NamedTemporaryFile(delete=False) as f: - f.write(str(diff).encode('utf-8')) + f.write(str(diff).encode("utf-8")) try: print() - html = subprocess.check_output([ - 'elinks', - '-dump', - '-no-references', - '-dump-color-mode', - '1', - f.name, - ]).decode('utf-8') + html = subprocess.check_output( + ["elinks", "-dump", "-no-references", "-dump-color-mode", "1", f.name] + ).decode("utf-8") print(html) except: - print('\nFor side by side diff install elinks') + print("\nFor side by side diff install elinks") diff = difflib.Differ().compare(original_lines, uncompyled_lines) - print('\n'.join(diff)) + print("\n".join(diff)) finally: os.unlink(f.name) @@ -80,18 +81,19 @@ def are_instructions_equal(i1, i2): :return: True if the two instructions are approximately equal, otherwise False. """ - result = (1 == 1 + result = ( + 1 == 1 and i1.opname == i2.opname and i1.opcode == i2.opcode and i1.arg == i2.arg # ignore differences due to code objects # TODO : Better way of ignoring address - and (i1.argval == i2.argval or '', mode) + original_code = compile(text, "", mode) original_dis = _dis_to_text(original_code) original_text = text - deparsed = code_deparse(original_code, - out=six.StringIO(), - version=PYTHON_VERSION, - compile_mode=mode) + deparsed = code_deparse( + original_code, out=six.StringIO(), version=PYTHON_VERSION, compile_mode=mode + ) uncompyled_text = deparsed.text - uncompyled_code = compile(uncompyled_text, '', 'exec') + uncompyled_code = compile(uncompyled_text, "", "exec") if not are_code_objects_equal(uncompyled_code, original_code): @@ -138,15 +139,17 @@ def validate_uncompyle(text, mode='exec'): def output(text, dis): width = 60 - return '\n\n'.join([ - ' SOURCE CODE '.center(width, '#'), - text.strip(), - ' BYTECODE '.center(width, '#'), - dis - ]) + return "\n\n".join( + [ + " SOURCE CODE ".center(width, "#"), + text.strip(), + " BYTECODE ".center(width, "#"), + dis, + ] + ) original = output(original_text, original_dis) uncompyled = output(uncompyled_text, uncompyled_dis) print_diff(original, uncompyled) - assert 'original' == 'uncompyled' + assert "original" == "uncompyled" diff --git a/requirements-dev.txt b/requirements-dev.txt index b97df148..77c74ff3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,4 @@ flake8 hypothesis<=3.0.0 +six +pytest==3.2.5 diff --git a/test/simple_source/bug26/00_generator.py b/test/simple_source/bug26/00_generator.py deleted file mode 100644 index 12a4cfad..00000000 --- a/test/simple_source/bug26/00_generator.py +++ /dev/null @@ -1,7 +0,0 @@ -# Issue #283 in Python 2.6 -# See https://github.com/rocky/python-uncompyle6/issues/283 - -# This code is RUNNABLE! - -G = ( c for c in "spam, Spam, SPAM!" if c > 'A' and c < 'S') -assert list(G) == ["P", "M"] diff --git a/test/stdlib/runtests.sh b/test/stdlib/runtests.sh index 122bc4d4..9189dfae 100755 --- a/test/stdlib/runtests.sh +++ b/test/stdlib/runtests.sh @@ -56,18 +56,46 @@ case $PYVERSION in ;; 2.6) SKIP_TESTS=( + [test_aepack.py]=1 + [test_aifc.py]=1 + [test_array.py]=1 + [test_audioop.py]=1 + [test_base64.py]=1 + [test_bigmem.py]=1 + [test_binascii.py]=1 + [test_builtin.py]=1 + [test_bytes.py]=1 + [test_class.py]=1 + [test_codeccallbacks.py]=1 + [test_codecencodings_cn.py]=1 + [test_codecencodings_hk.py]=1 + [test_codecencodings_jp.py]=1 + [test_codecencodings_kr.py]=1 + [test_codecencodings_tw.py]=1 + [test_codecencodings_cn.py]=1 + [test_codecmaps_hk.py]=1 + [test_codecmaps_jp.py]=1 + [test_codecmaps_kr.py]=1 + [test_codecmaps_tw.py]=1 + [test_codecs.py]=1 [test_compile.py]=1 # Intermittent - sometimes works and sometimes doesn't - [test_grammar.py]=1 # Need real flow control. "and" in side "or" - # "and" inside ifelse need to simulatenously work + [test_cookielib.py]=1 + [test_copy.py]=1 + [test_decimal.py]=1 + [test_descr.py]=1 # Problem in pickle.py? + [test_exceptions.py]=1 + [test_extcall.py]=1 + [test_float.py]=1 + [test_future4.py]=1 + [test_generators.py]=1 [test_grp.py]=1 # Long test - might work Control flow? [test_opcodes.py]=1 [test_pwd.py]=1 # Long test - might work? Control flow? [test_re.py]=1 # Probably Control flow? [test_queue.py]=1 # Control flow? - [test_strftime.py]=1 [test_trace.py]=1 # Line numbers are expected to be different [test_zipfile64.py]=1 # Skip Long test - [test_zlib.py]=1 # Look at + [test_zlib.py]=1 # Takes too long to run (more than 3 minutes 39 seconds) # .pyenv/versions/2.6.9/lib/python2.6/lib2to3/refactor.pyc # .pyenv/versions/2.6.9/lib/python2.6/pyclbr.pyc # .pyenv/versions/2.6.9/lib/python2.6/quopri.pyc -- look at ishex, is short @@ -103,9 +131,11 @@ case $PYVERSION in [test_httplib.py]=1 # Ok, but POWER has problems with this [test_pdb.py]=1 # Ok, but POWER has problems with this + [test_capi.py]=1 [test_curses.py]=1 # Possibly fails on its own but not detected [test_dis.py]=1 # We change line numbers - duh! [test_doctest.py]=1 # Fails on its own + [test_exceptions.py]=1 [test_format.py]=1 # control flow. uncompyle2 does not have problems here [test_generators.py]=1 # control flow. uncompyle2 has problem here too [test_grammar.py]=1 # Too many stmts. Handle large stmts @@ -113,6 +143,9 @@ case $PYVERSION in [test_ioctl.py]=1 # Test takes too long to run [test_itertools.py]=1 # Fix erroneous reduction to "conditional_true". # See test/simple_source/bug27+/05_not_unconditional.py + [test_long.py]=1 + [test_long_future.py]=1 + [test_math.py]=1 [test_memoryio.py]=1 # FIX [test_multiprocessing.py]=1 # On uncompyle2, taks 24 secs [test_pep352.py]=1 # ? @@ -122,9 +155,11 @@ case $PYVERSION in [test_pty.py]=1 [test_queue.py]=1 # Control flow? [test_re.py]=1 # Probably Control flow? + [test_runpy.py]=1 # Long and fails on its own [test_select.py]=1 # Runs okay but takes 11 seconds [test_socket.py]=1 # Runs ok but takes 22 seconds [test_subprocess.py]=1 # Runs ok but takes 22 seconds + [test_sys_setprofile.py]=1 [test_sys_settrace.py]=1 # Line numbers are expected to be different [test_strtod.py]=1 # FIX [test_traceback.py]=1 # Line numbers change - duh. @@ -205,10 +240,14 @@ else fi typeset -i ALL_FILES_STARTTIME=$(date +%s) +typeset -i skipped=0 for file in $files; do # AIX bash doesn't grok [[ -v SKIP... ]] - [[ ${SKIP_TESTS[$file]} == 1 ]] && continue + if [[ ${SKIP_TESTS[$file]} == 1 ]] ; then + ((skipped++)) + continue + fi # If the fails *before* decompiling, skip it! typeset -i STARTTIME=$(date +%s) @@ -242,7 +281,7 @@ for file in $files; do fi (( rc != 0 && allerrs++ )) if (( STOP_ONERROR && rc )) ; then - echo "** Ran $i tests before failure **" + echo "** Ran $i tests before failure. Skipped $skipped test for known failures. **" exit $allerrs fi done @@ -252,5 +291,5 @@ typeset -i ALL_FILES_ENDTIME=$(date +%s) printf "Ran $i unit-test files in " displaytime $time_diff - +echo "Skipped $skipped test for known failures." exit $allerrs diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index ac2181c1..a5525c9c 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -45,10 +45,12 @@ Options: --help show this message Debugging Options: - --asm | -a include byte-code (disables --verify) - --grammar | -g show matching grammar - --tree | -t include syntax tree (disables --verify) - --tree++ add template rules to --tree when possible + --asm | -a include byte-code (disables --verify) + --grammar | -g show matching grammar + --tree={before|after} + -t {before|after} include syntax before (or after) tree transformation + (disables --verify) + --tree++ | -T add template rules to --tree=before when possible Extensions of generated files: '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) @@ -84,7 +86,7 @@ def main_bin(): try: opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:', 'help asm compile= grammar linemaps recurse ' - 'timestamp tree tree+ ' + 'timestamp tree= tree+ ' 'fragments verify verify-run version ' 'syntax-verify ' 'showgrammar encoding='.split(' ')) @@ -115,10 +117,19 @@ def main_bin(): options['showasm'] = 'after' options['do_verify'] = None elif opt in ('--tree', '-t'): - options['showast'] = True + if 'showast' not in options: + options['showast'] = {} + if val == 'before': + options['showast'][val] = True + elif val == 'after': + options['showast'][val] = True + else: + options['showast']['before'] = True options['do_verify'] = None elif opt in ('--tree+', '-T'): - options['showast'] = 'Full' + if 'showast' not in options: + options['showast'] = {} + options['showast']['Full'] = True options['do_verify'] = None elif opt in ('--grammar', '-g'): options['showgrammar'] = True diff --git a/uncompyle6/main.py b/uncompyle6/main.py index b689758a..102d2035 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -41,10 +41,21 @@ def _get_outstream(outfile): return open(outfile, 'wb') def decompile( - bytecode_version, co, out=None, showasm=None, showast=False, - timestamp=None, showgrammar=False, source_encoding=None, code_objects={}, - source_size=None, is_pypy=None, magic_int=None, - mapstream=None, do_fragments=False): + bytecode_version, + co, + out=None, + showasm=None, + showast={}, + timestamp=None, + showgrammar=False, + source_encoding=None, + code_objects={}, + source_size=None, + is_pypy=None, + magic_int=None, + mapstream=None, + do_fragments=False, +): """ ingests and deparses a given code block 'co' @@ -282,7 +293,7 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None, sys.stdout.write("\n") sys.stderr.write("\nLast file: %s " % (infile)) raise - except RuntimeError(e): + except RuntimeError, e: sys.stdout.write("\n%s\n" % str(e)) if str(e).startswith('Unsupported Python'): sys.stdout.write("\n") @@ -299,7 +310,7 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None, # failed_files += 1 # if current_outfile: # outstream.close() - # os.rename(current_outfile, current_outfile + '_failed') + # os.rename(current_outfile, current_outfile + "_failed") # else: # sys.stderr.write("\n# %s" % sys.exc_info()[1]) # sys.stderr.write("\n# Can't uncompile %s\n" % infile) diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index 8b03ac15..c1598be8 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -234,7 +234,7 @@ class Python27Parser(Python2Parser): return invalid if rule == ('and', ('expr', 'jmp_false', 'expr', '\\e_come_from_opt')): - # If the instruction after the instructions forming "and" is a "YIELD_VALUE" + # If the instruction after the instructions formin "and" is an "YIELD_VALUE" # then this is probably an "if" inside a comprehension. if tokens[last] == 'YIELD_VALUE': # Note: We might also consider testing last+1 being "POP_TOP" @@ -243,9 +243,6 @@ class Python27Parser(Python2Parser): # Test that jmp_false jumps to the end of "and" # or that it jumps to the same place as the end of "and" jmp_false = ast[1][0] - - # FIXME: if the jmp_false is POP_JUMP_IF_FALSE is the address - # is *absoulte* and the calulation below is wrong! jmp_target = jmp_false.offset + jmp_false.attr + 3 return not (jmp_target == tokens[last].offset or tokens[last].pattr == jmp_false.pattr) diff --git a/uncompyle6/parsers/treenode.py b/uncompyle6/parsers/treenode.py index f08d42c9..f4a57559 100644 --- a/uncompyle6/parsers/treenode.py +++ b/uncompyle6/parsers/treenode.py @@ -7,6 +7,10 @@ if PYTHON3: intern = sys.intern class SyntaxTree(spark_AST): + def __init__(self, *args, **kwargs): + spark_AST.__init__(self, *args, **kwargs) + self.transformed_by = None + def isNone(self): """An SyntaxTree None token. We can't use regular list comparisons because SyntaxTree token offsets might be different""" @@ -23,6 +27,11 @@ class SyntaxTree(spark_AST): if len(self) > 1: rv += " (%d)" % (len(self)) enumerate_children = True + if self.transformed_by is not None: + if self.transformed_by is True: + rv += " (transformed)" + else: + rv += " (transformed by %s)" % self.transformed_by rv = indent + rv indent += ' ' i = 0 diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 58b4ae3f..a5151741 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -146,8 +146,13 @@ from uncompyle6.semantics.helper import ( find_globals_and_nonlocals, flatten_list, ) + from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.transform import ( + is_docstring, + TreeTransform, +) from uncompyle6.semantics.consts import ( LINE_LENGTH, RETURN_LOCALS, @@ -176,13 +181,6 @@ else: from StringIO import StringIO -def is_docstring(node): - try: - return node[0][0].kind == "assign" and node[0][0][1][0].pattr == "__doc__" - except: - return False - - class SourceWalkerError(Exception): def __init__(self, errmsg): self.errmsg = errmsg @@ -230,6 +228,7 @@ class SourceWalker(GenericASTTraversal, object): """ GenericASTTraversal.__init__(self, ast=None) + self.scanner = scanner params = {"f": out, "indent": ""} self.version = version @@ -239,6 +238,8 @@ class SourceWalker(GenericASTTraversal, object): compile_mode=compile_mode, is_pypy=is_pypy, ) + + self.treeTransform = TreeTransform(version, showast) self.debug_parser = dict(debug_parser) self.showast = showast self.params = params @@ -277,6 +278,19 @@ class SourceWalker(GenericASTTraversal, object): return + def maybe_show_tree(self, ast): + if self.showast and self.treeTransform.showast: + self.println( + """ +---- end before transform +---- begin after transform +""" + + " " + ) + + if isinstance(self.showast, dict) and self.showast.get: + maybe_show_tree(self, ast) + def str_with_template(self, ast): stream = sys.stdout stream.write(self.str_with_template1(ast, "", None)) @@ -299,6 +313,13 @@ class SourceWalker(GenericASTTraversal, object): key = key[i] pass + if ast.transformed_by is not None: + if ast.transformed_by is True: + rv += " transformed" + else: + rv += " transformed by %s" % ast.transformed_by + pass + pass if key.kind in table: rv += ": %s" % str(table[key.kind]) @@ -306,6 +327,7 @@ class SourceWalker(GenericASTTraversal, object): indent += " " i = 0 for node in ast: + if hasattr(node, "__repr1__"): if enumerate_children: child = self.str_with_template1(node, indent, i) @@ -685,89 +707,6 @@ class SourceWalker(GenericASTTraversal, object): self.println() self.prune() # stop recursing - # preprocess is used for handling chains of - # if elif elif - def n_ifelsestmt(self, node, preprocess=False): - """ - Here we turn: - - if ... - else - if .. - - into: - - if .. - elif ... - - [else ...] - - where appropriate - """ - else_suite = node[3] - - n = else_suite[0] - old_stmts = None - - if len(n) == 1 == len(n[0]) and n[0] == "stmt": - n = n[0][0] - elif n[0].kind in ("lastc_stmt", "lastl_stmt"): - n = n[0] - if n[0].kind in ( - "ifstmt", - "iflaststmt", - "iflaststmtl", - "ifelsestmtl", - "ifelsestmtc", - ): - # This seems needed for Python 2.5-2.7 - n = n[0] - pass - pass - elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt": - else_suite_stmts = n[0] - if else_suite_stmts[0].kind not in ("ifstmt", "iflaststmt", "ifelsestmtl"): - if not preprocess: - self.default(node) - return - old_stmts = n - n = else_suite_stmts[0] - else: - if not preprocess: - self.default(node) - return - - if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"): - node.kind = "ifelifstmt" - n.kind = "elifstmt" - elif n.kind in ("ifelsestmtr",): - node.kind = "ifelifstmt" - n.kind = "elifelsestmtr" - elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): - node.kind = "ifelifstmt" - self.n_ifelsestmt(n, preprocess=True) - if n == "ifelifstmt": - n.kind = "elifelifstmt" - elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): - n.kind = "elifelsestmt" - if not preprocess: - if old_stmts: - if n.kind == "elifstmt": - trailing_else = SyntaxTree("stmts", old_stmts[1:]) - # We use elifelsestmtr because it has 3 nodes - elifelse_stmt = SyntaxTree( - "elifelsestmtr", [n[0], n[1], trailing_else] - ) - node[3] = elifelse_stmt - pass - else: - # Other cases for n.kind may happen here - return - pass - self.default(node) - - n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt - def n_ifelsestmtr(self, node): if node[2] == "COME_FROM": return_stmts_node = node[3] @@ -899,17 +838,19 @@ class SourceWalker(GenericASTTraversal, object): def n_mkfunc(self, node): if self.version >= 3.3 or node[-2] in ("kwargs", "no_kwargs"): - # LOAD_CONST code object .. - # LOAD_CONST 'x0' if >= 3.3 + # LOAD_CODET code object .. + # LOAD_CONST "x0" if >= 3.3 # MAKE_FUNCTION .. code_node = node[-3] elif node[-2] == "expr": code_node = node[-2][0] else: - # LOAD_CONST code object .. + # LOAD_CODE code object .. # MAKE_FUNCTION .. code_node = node[-2] + assert iscode(code_node.attr) + func_name = code_node.attr.co_name self.write(func_name) @@ -930,6 +871,75 @@ class SourceWalker(GenericASTTraversal, object): else: make_function2(self, node, is_lambda, nested, code_node) + def n_docstring(self, node): + + indent = self.indent + docstring = node[0].pattr + + quote = '"""' + if docstring.find(quote) >= 0: + if docstring.find("'''") == -1: + quote = "'''" + + self.write(indent) + docstring = repr(docstring.expandtabs())[1:-1] + + for (orig, replace) in (('\\\\', '\t'), + ('\\r\\n', '\n'), + ('\\n', '\n'), + ('\\r', '\n'), + ('\\"', '"'), + ("\\'", "'")): + docstring = docstring.replace(orig, replace) + + # Do a raw string if there are backslashes but no other escaped characters: + # also check some edge cases + if ('\t' in docstring + and '\\' not in docstring + and len(docstring) >= 2 + and docstring[-1] != '\t' + and (docstring[-1] != '"' + or docstring[-2] == '\t')): + self.write('r') # raw string + # Restore backslashes unescaped since raw + docstring = docstring.replace('\t', '\\') + else: + # Escape the last character if it is the same as the + # triple quote character. + quote1 = quote[-1] + if len(docstring) and docstring[-1] == quote1: + docstring = docstring[:-1] + '\\' + quote1 + + # Escape triple quote when needed + if quote == '"""': + replace_str = '\\"""' + else: + assert quote == "'''" + replace_str = "\\'''" + + docstring = docstring.replace(quote, replace_str) + docstring = docstring.replace('\t', '\\\\') + + lines = docstring.split('\n') + + self.write(quote) + if len(lines) == 0: + self.println(quote) + elif len(lines) == 1: + self.println(lines[0], quote) + else: + self.println(lines[0]) + for line in lines[1:-1]: + if line: + self.println( line ) + else: + self.println( "\n\n" ) + pass + pass + self.println(lines[-1], quote) + self.prune() + + def n_mklambda(self, node): self.make_function(node, is_lambda=True, code_node=node[-2]) self.prune() # stop recursing @@ -1816,6 +1826,19 @@ class SourceWalker(GenericASTTraversal, object): lastnode = node.pop() lastnodetype = lastnode.kind + # If this build list is inside a CALL_FUNCTION_VAR, + # then the first * has already been printed. + # Until I have a better way to check for CALL_FUNCTION_VAR, + # will assume that if the text ends in *. + last_was_star = self.f.getvalue().endswith("*") + + if lastnodetype.endswith("UNPACK"): + # FIXME: need to handle range of BUILD_LIST_UNPACK + have_star = True + # endchar = '' + else: + have_star = False + if lastnodetype.startswith("BUILD_LIST"): self.write("[") endchar = "]" @@ -1866,6 +1889,13 @@ class SourceWalker(GenericASTTraversal, object): else: if sep != "": sep += " " + if not last_was_star: + if have_star: + sep += "*" + pass + pass + else: + last_was_star = False self.write(sep, value) sep = "," if lastnode.attr == 1 and lastnodetype.startswith("BUILD_TUPLE"): @@ -2219,6 +2249,10 @@ class SourceWalker(GenericASTTraversal, object): code._tokens = None # save memory assert ast == "stmts" + if ast[0] == "docstring": + self.println(self.traverse(ast[0])) + del ast[0] + first_stmt = ast[0][0] if 3.0 <= self.version <= 3.3: try: @@ -2366,8 +2400,10 @@ class SourceWalker(GenericASTTraversal, object): raise ParserError(e, tokens) except AssertionError, e: raise ParserError(e, tokens) - maybe_show_tree(self, ast) - return ast + transform_ast = self.treeTransform.transform(ast) + self.maybe_show_tree(ast) + del ast # Save memory + return transform_ast # The bytecode for the end of the main routine has a # "return None". However you can't issue a "return" statement in @@ -2399,11 +2435,15 @@ class SourceWalker(GenericASTTraversal, object): except python_parser.ParserError, e: raise ParserError(e, tokens) - maybe_show_tree(self, ast) - checker(ast, False, self.ast_errors) - return ast + self.customize(customize) + transform_ast = self.treeTransform.transform(ast) + + self.maybe_show_tree(ast) + + del ast # Save memory + return transform_ast @classmethod def _get_mapping(cls, node): diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py new file mode 100644 index 00000000..6d9e6d86 --- /dev/null +++ b/uncompyle6/semantics/transform.py @@ -0,0 +1,239 @@ +# Copyright (c) 2019 by Rocky Bernstein + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from uncompyle6.show import maybe_show_tree +from copy import copy +from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException + +from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.consts import RETURN_NONE + + +def is_docstring(node): + try: + return node[0][0].kind == "assign" and node[0][0][1][0].pattr == "__doc__" + except: + return False + + +class TreeTransform(GenericASTTraversal, object): + def __init__(self, version, show_ast=None): + self.version = version + self.showast = show_ast + return + + def maybe_show_tree(self, ast): + if isinstance(self.showast, dict) and self.showast: + maybe_show_tree(self, ast) + + def preorder(self, node=None): + """Walk the tree in roughly 'preorder' (a bit of a lie explained below). + For each node with typestring name *name* if the + node has a method called n_*name*, call that before walking + children. + + In typical use a node with children can call "preorder" in any + order it wants which may skip children or order then in ways + other than first to last. In fact, this this happens. So in + this sense this function not strictly preorder. + """ + if node is None: + node = self.ast + + try: + name = "n_" + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + node = func(node) + except GenericASTTraversalPruningException: + return + + for i, kid in enumerate(node): + node[i] = self.preorder(kid) + return node + + def n_ifstmt(self, node): + """Here we check if we can turn an `ifstmt` or 'iflaststmtl` into + some kind of `assert` statement""" + + testexpr = node[0] + + if testexpr.kind != "testexpr": + return node + if node.kind == "ifstmt": + ifstmts_jump = node[1] + if node[1] != "_ifstmts_jump": + return node + stmts = ifstmts_jump[0] + else: + # iflaststmtl works this way + stmts = node[1] + + if stmts in ("c_stmts",) and len(stmts) == 1: + stmt = stmts[0] + raise_stmt = stmt[0] + if raise_stmt == "raise_stmt1" and len(testexpr[0]) == 2: + assert_expr = testexpr[0][0] + assert_expr.kind = "assert_expr" + jmp_true = testexpr[0][1] + expr = raise_stmt[0] + RAISE_VARARGS_1 = raise_stmt[1] + if expr[0] == "call": + # ifstmt + # 0. testexpr + # testtrue (2) + # 0. expr + # 1. _ifstmts_jump (2) + # 0. c_stmts + # stmt + # raise_stmt1 (2) + # 0. expr + # call (3) + # 1. RAISE_VARARGS_1 + # becomes: + # assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_1 COME_FROM + call = expr[0] + LOAD_ASSERT = call[0] + expr = call[1][0] + node = SyntaxTree( + "assert2", + [assert_expr, jmp_true, LOAD_ASSERT, expr, RAISE_VARARGS_1] + ) + node.transformed_by="n_ifstmt", + + else: + # ifstmt + # 0. testexpr (2) + # testtrue + # 0. expr + # 1. _ifstmts_jump (2) + # 0. c_stmts + # stmts + # raise_stmt1 (2) + # 0. expr + # LOAD_ASSERT + # 1. RAISE_VARARGS_1 + # becomes: + # assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM + LOAD_ASSERT = expr[0] + node = SyntaxTree( + "assert", + [assert_expr, jmp_true, LOAD_ASSERT, RAISE_VARARGS_1] + ) + node.transformed_by="n_ifstmt", + pass + pass + return node + + n_iflaststmtl = n_ifstmt + + # preprocess is used for handling chains of + # if elif elif + def n_ifelsestmt(self, node, preprocess=False): + """ + Here we turn: + + if ... + else + if .. + + into: + + if .. + elif ... + + [else ...] + + where appropriate + """ + else_suite = node[3] + + n = else_suite[0] + old_stmts = None + + if len(n) == 1 == len(n[0]) and n[0] == "stmt": + n = n[0][0] + elif n[0].kind in ("lastc_stmt", "lastl_stmt"): + n = n[0] + if n[0].kind in ( + "ifstmt", + "iflaststmt", + "iflaststmtl", + "ifelsestmtl", + "ifelsestmtc", + ): + # This seems needed for Python 2.5-2.7 + n = n[0] + pass + pass + elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt": + else_suite_stmts = n[0] + if else_suite_stmts[0].kind not in ("ifstmt", "iflaststmt", "ifelsestmtl"): + return node + old_stmts = n + n = else_suite_stmts[0] + else: + return node + + if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"): + node.kind = "ifelifstmt" + n.kind = "elifstmt" + elif n.kind in ("ifelsestmtr",): + node.kind = "ifelifstmt" + n.kind = "elifelsestmtr" + elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): + node.kind = "ifelifstmt" + self.n_ifelsestmt(n, preprocess=True) + if n == "ifelifstmt": + n.kind = "elifelifstmt" + elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): + n.kind = "elifelsestmt" + if not preprocess: + if old_stmts: + if n.kind == "elifstmt": + trailing_else = SyntaxTree("stmts", old_stmts[1:]) + # We use elifelsestmtr because it has 3 nodes + elifelse_stmt = SyntaxTree( + "elifelsestmtr", [n[0], n[1], trailing_else] + ) + node[3] = elifelse_stmt + pass + else: + # Other cases for n.kind may happen here + pass + pass + node.transformed_by = "n_ifelsestmt" + return node + + n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt + + def traverse(self, node, is_lambda=False): + node = self.preorder(node) + return node + + def transform(self, ast): + self.maybe_show_tree(ast) + self.ast = copy(ast) + self.ast = self.traverse(self.ast, is_lambda=False) + + if self.ast[-1] == RETURN_NONE: + self.ast.pop() # remove last node + # todo: if empty, add 'pass' + + return self.ast + + # Write template_engine + # def template_engine diff --git a/uncompyle6/show.py b/uncompyle6/show.py index 2cb927c3..0ca9d902 100644 --- a/uncompyle6/show.py +++ b/uncompyle6/show.py @@ -32,7 +32,7 @@ def maybe_show_asm(showasm, tokens): stream = sys.stdout for t in tokens: stream.write(str(t)) - stream.write('\n') + stream.write("\n") def maybe_show_tree(walker, ast): @@ -46,15 +46,16 @@ def maybe_show_tree(walker, ast): :param ast: The ast to show. """ if walker.showast: - if hasattr(walker.showast, 'write'): + if hasattr(walker.showast, "write"): stream = walker.showast else: stream = sys.stdout - if walker.showast == 'Full': + if (isinstance(walker.showast, dict) and walker.showast.get("Full", False) + and hasattr(walker, "str_with_template")): walker.str_with_template(ast) else: stream.write(str(ast)) - stream.write('\n') + stream.write("\n") def maybe_show_tree_param_default(show_tree, name, default): @@ -79,6 +80,6 @@ def maybe_show_tree_param_default(show_tree, name, default): stream.write('--' + name) stream.write('\n') stream.write(str(default)) - stream.write('\n') - stream.write('--') - stream.write('\n') + stream.write("\n") + stream.write("--") + stream.write("\n") diff --git a/uncompyle6/version.py b/uncompyle6/version.py index 2cb6e9f8..5e196262 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -12,4 +12,4 @@ # along with this program. If not, see . # This file is suitable for sourcing inside bash as # well as importing into Python -VERSION="3.3.5" # noqa +VERSION="3.4.0" # noqa