diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index 037ef177..fc121b84 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -46,10 +46,12 @@ Options: --help show this message Debugging Options: - --asm | -a include byte-code (disables --verify) - --grammar | -g show matching grammar - --tree | -t include syntax tree (disables --verify) - --tree++ add template rules to --tree when possible + --asm | -a include byte-code (disables --verify) + --grammar | -g show matching grammar + --tree={before|after} + -t {before|after} include syntax before (or after) tree transformation + (disables --verify) + --tree++ | -T add template rules to --tree=before when possible Extensions of generated files: '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) @@ -89,7 +91,7 @@ def main_bin(): try: opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:', 'help asm compile= grammar linemaps recurse ' - 'timestamp tree tree+ ' + 'timestamp tree= tree+ ' 'fragments verify verify-run version ' 'syntax-verify ' 'showgrammar encoding='.split(' ')) @@ -119,10 +121,19 @@ def main_bin(): options['showasm'] = 'after' options['do_verify'] = None elif opt in ('--tree', '-t'): - options['showast'] = True + if 'showast' not in options: + options['showast'] = {} + if val == 'before': + options['showast'][val] = True + elif val == 'after': + options['showast'][val] = True + else: + options['showast']['before'] = True options['do_verify'] = None elif opt in ('--tree+', '-T'): - options['showast'] = 'Full' + if 'showast' not in options: + options['showast'] = {} + options['showast']['Full'] = True options['do_verify'] = None elif opt in ('--grammar', '-g'): options['showgrammar'] = True diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 3609cae4..b053a28c 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -45,10 +45,21 @@ def _get_outstream(outfile): return open(outfile, mode='w', encoding='utf-8') def decompile( - bytecode_version, co, out=None, showasm=None, showast=False, - timestamp=None, showgrammar=False, source_encoding=None, code_objects={}, - source_size=None, is_pypy=None, magic_int=None, - mapstream=None, do_fragments=False): + bytecode_version, + co, + out=None, + showasm=None, + showast={}, + timestamp=None, + showgrammar=False, + source_encoding=None, + code_objects={}, + source_size=None, + is_pypy=None, + magic_int=None, + mapstream=None, + do_fragments=False, +): """ ingests and deparses a given code block 'co' @@ -294,7 +305,7 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None, # failed_files += 1 # if current_outfile: # outstream.close() - # os.rename(current_outfile, current_outfile + '_failed') + # os.rename(current_outfile, current_outfile + "_failed") # else: # sys.stderr.write("\n# %s" % sys.exc_info()[1]) # sys.stderr.write("\n# Can't uncompile %s\n" % infile) diff --git a/uncompyle6/parsers/treenode.py b/uncompyle6/parsers/treenode.py index f08d42c9..9f468072 100644 --- a/uncompyle6/parsers/treenode.py +++ b/uncompyle6/parsers/treenode.py @@ -7,6 +7,10 @@ if PYTHON3: intern = sys.intern class SyntaxTree(spark_AST): + def __init__(self, *args, **kwargs): + super(SyntaxTree, self).__init__(*args, **kwargs) + self.transformed_by = None + def isNone(self): """An SyntaxTree None token. We can't use regular list comparisons because SyntaxTree token offsets might be different""" @@ -23,6 +27,11 @@ class SyntaxTree(spark_AST): if len(self) > 1: rv += " (%d)" % (len(self)) enumerate_children = True + if self.transformed_by is not None: + if self.transformed_by is True: + rv += " (transformed)" + else: + rv += " (transformed by %s)" % self.transformed_by rv = indent + rv indent += ' ' i = 0 diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 339b1943..f32ad4e6 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -146,8 +146,13 @@ from uncompyle6.semantics.helper import ( find_globals_and_nonlocals, flatten_list, ) + from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.transform import ( + is_docstring, + TreeTransform, +) from uncompyle6.semantics.consts import ( LINE_LENGTH, RETURN_LOCALS, @@ -176,13 +181,6 @@ else: from StringIO import StringIO -def is_docstring(node): - try: - return node[0][0].kind == "assign" and node[0][0][1][0].pattr == "__doc__" - except: - return False - - class SourceWalkerError(Exception): def __init__(self, errmsg): self.errmsg = errmsg @@ -230,6 +228,7 @@ class SourceWalker(GenericASTTraversal, object): """ GenericASTTraversal.__init__(self, ast=None) + self.scanner = scanner params = {"f": out, "indent": ""} self.version = version @@ -239,6 +238,8 @@ class SourceWalker(GenericASTTraversal, object): compile_mode=compile_mode, is_pypy=is_pypy, ) + + self.treeTransform = TreeTransform(version, showast) self.debug_parser = dict(debug_parser) self.showast = showast self.params = params @@ -277,6 +278,19 @@ class SourceWalker(GenericASTTraversal, object): return + def maybe_show_tree(self, ast): + if self.showast and self.treeTransform.showast: + self.println( + """ +---- end before transform +---- begin after transform +""" + + " " + ) + + if isinstance(self.showast, dict) and self.showast.get: + maybe_show_tree(self, ast) + def str_with_template(self, ast): stream = sys.stdout stream.write(self.str_with_template1(ast, "", None)) @@ -299,6 +313,13 @@ class SourceWalker(GenericASTTraversal, object): key = key[i] pass + if ast.transformed_by is not None: + if ast.transformed_by is True: + rv += " transformed" + else: + rv += " transformed by %s" % ast.transformed_by + pass + pass if key.kind in table: rv += ": %s" % str(table[key.kind]) @@ -306,6 +327,7 @@ class SourceWalker(GenericASTTraversal, object): indent += " " i = 0 for node in ast: + if hasattr(node, "__repr1__"): if enumerate_children: child = self.str_with_template1(node, indent, i) @@ -685,89 +707,6 @@ class SourceWalker(GenericASTTraversal, object): self.println() self.prune() # stop recursing - # preprocess is used for handling chains of - # if elif elif - def n_ifelsestmt(self, node, preprocess=False): - """ - Here we turn: - - if ... - else - if .. - - into: - - if .. - elif ... - - [else ...] - - where appropriate - """ - else_suite = node[3] - - n = else_suite[0] - old_stmts = None - - if len(n) == 1 == len(n[0]) and n[0] == "stmt": - n = n[0][0] - elif n[0].kind in ("lastc_stmt", "lastl_stmt"): - n = n[0] - if n[0].kind in ( - "ifstmt", - "iflaststmt", - "iflaststmtl", - "ifelsestmtl", - "ifelsestmtc", - ): - # This seems needed for Python 2.5-2.7 - n = n[0] - pass - pass - elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt": - else_suite_stmts = n[0] - if else_suite_stmts[0].kind not in ("ifstmt", "iflaststmt", "ifelsestmtl"): - if not preprocess: - self.default(node) - return - old_stmts = n - n = else_suite_stmts[0] - else: - if not preprocess: - self.default(node) - return - - if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"): - node.kind = "ifelifstmt" - n.kind = "elifstmt" - elif n.kind in ("ifelsestmtr",): - node.kind = "ifelifstmt" - n.kind = "elifelsestmtr" - elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): - node.kind = "ifelifstmt" - self.n_ifelsestmt(n, preprocess=True) - if n == "ifelifstmt": - n.kind = "elifelifstmt" - elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): - n.kind = "elifelsestmt" - if not preprocess: - if old_stmts: - if n.kind == "elifstmt": - trailing_else = SyntaxTree("stmts", old_stmts[1:]) - # We use elifelsestmtr because it has 3 nodes - elifelse_stmt = SyntaxTree( - "elifelsestmtr", [n[0], n[1], trailing_else] - ) - node[3] = elifelse_stmt - pass - else: - # Other cases for n.kind may happen here - return - pass - self.default(node) - - n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt - def n_ifelsestmtr(self, node): if node[2] == "COME_FROM": return_stmts_node = node[3] @@ -899,17 +838,19 @@ class SourceWalker(GenericASTTraversal, object): def n_mkfunc(self, node): if self.version >= 3.3 or node[-2] in ("kwargs", "no_kwargs"): - # LOAD_CONST code object .. - # LOAD_CONST 'x0' if >= 3.3 + # LOAD_CODET code object .. + # LOAD_CONST "x0" if >= 3.3 # MAKE_FUNCTION .. code_node = node[-3] elif node[-2] == "expr": code_node = node[-2][0] else: - # LOAD_CONST code object .. + # LOAD_CODE code object .. # MAKE_FUNCTION .. code_node = node[-2] + assert iscode(code_node.attr) + func_name = code_node.attr.co_name self.write(func_name) @@ -930,6 +871,75 @@ class SourceWalker(GenericASTTraversal, object): else: make_function2(self, node, is_lambda, nested, code_node) + def n_docstring(self, node): + + indent = self.indent + docstring = node[0].pattr + + quote = '"""' + if docstring.find(quote) >= 0: + if docstring.find("'''") == -1: + quote = "'''" + + self.write(indent) + docstring = repr(docstring.expandtabs())[1:-1] + + for (orig, replace) in (('\\\\', '\t'), + ('\\r\\n', '\n'), + ('\\n', '\n'), + ('\\r', '\n'), + ('\\"', '"'), + ("\\'", "'")): + docstring = docstring.replace(orig, replace) + + # Do a raw string if there are backslashes but no other escaped characters: + # also check some edge cases + if ('\t' in docstring + and '\\' not in docstring + and len(docstring) >= 2 + and docstring[-1] != '\t' + and (docstring[-1] != '"' + or docstring[-2] == '\t')): + self.write('r') # raw string + # Restore backslashes unescaped since raw + docstring = docstring.replace('\t', '\\') + else: + # Escape the last character if it is the same as the + # triple quote character. + quote1 = quote[-1] + if len(docstring) and docstring[-1] == quote1: + docstring = docstring[:-1] + '\\' + quote1 + + # Escape triple quote when needed + if quote == '"""': + replace_str = '\\"""' + else: + assert quote == "'''" + replace_str = "\\'''" + + docstring = docstring.replace(quote, replace_str) + docstring = docstring.replace('\t', '\\\\') + + lines = docstring.split('\n') + + self.write(quote) + if len(lines) == 0: + self.println(quote) + elif len(lines) == 1: + self.println(lines[0], quote) + else: + self.println(lines[0]) + for line in lines[1:-1]: + if line: + self.println( line ) + else: + self.println( "\n\n" ) + pass + pass + self.println(lines[-1], quote) + self.prune() + + def n_mklambda(self, node): self.make_function(node, is_lambda=True, code_node=node[-2]) self.prune() # stop recursing @@ -1816,6 +1826,19 @@ class SourceWalker(GenericASTTraversal, object): lastnode = node.pop() lastnodetype = lastnode.kind + # If this build list is inside a CALL_FUNCTION_VAR, + # then the first * has already been printed. + # Until I have a better way to check for CALL_FUNCTION_VAR, + # will assume that if the text ends in *. + last_was_star = self.f.getvalue().endswith("*") + + if lastnodetype.endswith("UNPACK"): + # FIXME: need to handle range of BUILD_LIST_UNPACK + have_star = True + # endchar = '' + else: + have_star = False + if lastnodetype.startswith("BUILD_LIST"): self.write("[") endchar = "]" @@ -1866,6 +1889,13 @@ class SourceWalker(GenericASTTraversal, object): else: if sep != "": sep += " " + if not last_was_star: + if have_star: + sep += "*" + pass + pass + else: + last_was_star = False self.write(sep, value) sep = "," if lastnode.attr == 1 and lastnodetype.startswith("BUILD_TUPLE"): @@ -2219,6 +2249,10 @@ class SourceWalker(GenericASTTraversal, object): code._tokens = None # save memory assert ast == "stmts" + if ast[0] == "docstring": + self.println(self.traverse(ast[0])) + del ast[0] + first_stmt = ast[0][0] if 3.0 <= self.version <= 3.3: try: @@ -2364,8 +2398,10 @@ class SourceWalker(GenericASTTraversal, object): self.p.insts = p_insts except (python_parser.ParserError, AssertionError) as e: raise ParserError(e, tokens) - maybe_show_tree(self, ast) - return ast + transform_ast = self.treeTransform.transform(ast) + self.maybe_show_tree(ast) + del ast # Save memory + return transform_ast # The bytecode for the end of the main routine has a # "return None". However you can't issue a "return" statement in @@ -2397,11 +2433,15 @@ class SourceWalker(GenericASTTraversal, object): except (python_parser.ParserError, AssertionError) as e: raise ParserError(e, tokens) - maybe_show_tree(self, ast) - checker(ast, False, self.ast_errors) - return ast + self.customize(customize) + transform_ast = self.treeTransform.transform(ast) + + self.maybe_show_tree(ast) + + del ast # Save memory + return transform_ast @classmethod def _get_mapping(cls, node): diff --git a/uncompyle6/semantics/transform.py b/uncompyle6/semantics/transform.py new file mode 100644 index 00000000..6d9e6d86 --- /dev/null +++ b/uncompyle6/semantics/transform.py @@ -0,0 +1,239 @@ +# Copyright (c) 2019 by Rocky Bernstein + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from uncompyle6.show import maybe_show_tree +from copy import copy +from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException + +from uncompyle6.parsers.treenode import SyntaxTree +from uncompyle6.scanners.tok import Token +from uncompyle6.semantics.consts import RETURN_NONE + + +def is_docstring(node): + try: + return node[0][0].kind == "assign" and node[0][0][1][0].pattr == "__doc__" + except: + return False + + +class TreeTransform(GenericASTTraversal, object): + def __init__(self, version, show_ast=None): + self.version = version + self.showast = show_ast + return + + def maybe_show_tree(self, ast): + if isinstance(self.showast, dict) and self.showast: + maybe_show_tree(self, ast) + + def preorder(self, node=None): + """Walk the tree in roughly 'preorder' (a bit of a lie explained below). + For each node with typestring name *name* if the + node has a method called n_*name*, call that before walking + children. + + In typical use a node with children can call "preorder" in any + order it wants which may skip children or order then in ways + other than first to last. In fact, this this happens. So in + this sense this function not strictly preorder. + """ + if node is None: + node = self.ast + + try: + name = "n_" + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + node = func(node) + except GenericASTTraversalPruningException: + return + + for i, kid in enumerate(node): + node[i] = self.preorder(kid) + return node + + def n_ifstmt(self, node): + """Here we check if we can turn an `ifstmt` or 'iflaststmtl` into + some kind of `assert` statement""" + + testexpr = node[0] + + if testexpr.kind != "testexpr": + return node + if node.kind == "ifstmt": + ifstmts_jump = node[1] + if node[1] != "_ifstmts_jump": + return node + stmts = ifstmts_jump[0] + else: + # iflaststmtl works this way + stmts = node[1] + + if stmts in ("c_stmts",) and len(stmts) == 1: + stmt = stmts[0] + raise_stmt = stmt[0] + if raise_stmt == "raise_stmt1" and len(testexpr[0]) == 2: + assert_expr = testexpr[0][0] + assert_expr.kind = "assert_expr" + jmp_true = testexpr[0][1] + expr = raise_stmt[0] + RAISE_VARARGS_1 = raise_stmt[1] + if expr[0] == "call": + # ifstmt + # 0. testexpr + # testtrue (2) + # 0. expr + # 1. _ifstmts_jump (2) + # 0. c_stmts + # stmt + # raise_stmt1 (2) + # 0. expr + # call (3) + # 1. RAISE_VARARGS_1 + # becomes: + # assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_1 COME_FROM + call = expr[0] + LOAD_ASSERT = call[0] + expr = call[1][0] + node = SyntaxTree( + "assert2", + [assert_expr, jmp_true, LOAD_ASSERT, expr, RAISE_VARARGS_1] + ) + node.transformed_by="n_ifstmt", + + else: + # ifstmt + # 0. testexpr (2) + # testtrue + # 0. expr + # 1. _ifstmts_jump (2) + # 0. c_stmts + # stmts + # raise_stmt1 (2) + # 0. expr + # LOAD_ASSERT + # 1. RAISE_VARARGS_1 + # becomes: + # assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM + LOAD_ASSERT = expr[0] + node = SyntaxTree( + "assert", + [assert_expr, jmp_true, LOAD_ASSERT, RAISE_VARARGS_1] + ) + node.transformed_by="n_ifstmt", + pass + pass + return node + + n_iflaststmtl = n_ifstmt + + # preprocess is used for handling chains of + # if elif elif + def n_ifelsestmt(self, node, preprocess=False): + """ + Here we turn: + + if ... + else + if .. + + into: + + if .. + elif ... + + [else ...] + + where appropriate + """ + else_suite = node[3] + + n = else_suite[0] + old_stmts = None + + if len(n) == 1 == len(n[0]) and n[0] == "stmt": + n = n[0][0] + elif n[0].kind in ("lastc_stmt", "lastl_stmt"): + n = n[0] + if n[0].kind in ( + "ifstmt", + "iflaststmt", + "iflaststmtl", + "ifelsestmtl", + "ifelsestmtc", + ): + # This seems needed for Python 2.5-2.7 + n = n[0] + pass + pass + elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt": + else_suite_stmts = n[0] + if else_suite_stmts[0].kind not in ("ifstmt", "iflaststmt", "ifelsestmtl"): + return node + old_stmts = n + n = else_suite_stmts[0] + else: + return node + + if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"): + node.kind = "ifelifstmt" + n.kind = "elifstmt" + elif n.kind in ("ifelsestmtr",): + node.kind = "ifelifstmt" + n.kind = "elifelsestmtr" + elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): + node.kind = "ifelifstmt" + self.n_ifelsestmt(n, preprocess=True) + if n == "ifelifstmt": + n.kind = "elifelifstmt" + elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"): + n.kind = "elifelsestmt" + if not preprocess: + if old_stmts: + if n.kind == "elifstmt": + trailing_else = SyntaxTree("stmts", old_stmts[1:]) + # We use elifelsestmtr because it has 3 nodes + elifelse_stmt = SyntaxTree( + "elifelsestmtr", [n[0], n[1], trailing_else] + ) + node[3] = elifelse_stmt + pass + else: + # Other cases for n.kind may happen here + pass + pass + node.transformed_by = "n_ifelsestmt" + return node + + n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt + + def traverse(self, node, is_lambda=False): + node = self.preorder(node) + return node + + def transform(self, ast): + self.maybe_show_tree(ast) + self.ast = copy(ast) + self.ast = self.traverse(self.ast, is_lambda=False) + + if self.ast[-1] == RETURN_NONE: + self.ast.pop() # remove last node + # todo: if empty, add 'pass' + + return self.ast + + # Write template_engine + # def template_engine diff --git a/uncompyle6/show.py b/uncompyle6/show.py index 308c81a2..13aca733 100644 --- a/uncompyle6/show.py +++ b/uncompyle6/show.py @@ -26,10 +26,10 @@ def maybe_show_asm(showasm, tokens): :param tokens: The asm tokens to show. """ if showasm: - stream = showasm if hasattr(showasm, 'write') else sys.stdout + stream = showasm if hasattr(showasm, "write") else sys.stdout for t in tokens: stream.write(str(t)) - stream.write('\n') + stream.write("\n") def maybe_show_tree(walker, ast): @@ -43,15 +43,16 @@ def maybe_show_tree(walker, ast): :param ast: The ast to show. """ if walker.showast: - if hasattr(walker.showast, 'write'): + if hasattr(walker.showast, "write"): stream = walker.showast else: stream = sys.stdout - if walker.showast == 'Full': + if (isinstance(walker.showast, dict) and walker.showast.get("Full", False) + and hasattr(walker, "str_with_template")): walker.str_with_template(ast) else: stream.write(str(ast)) - stream.write('\n') + stream.write("\n") def maybe_show_tree_param_default(show_tree, name, default): @@ -68,11 +69,11 @@ def maybe_show_tree_param_default(show_tree, name, default): :param default: The function parameter default. """ if show_tree: - stream = show_tree if hasattr(show_tree, 'write') else sys.stdout - stream.write('\n') - stream.write('--' + name) - stream.write('\n') + stream = show_tree if hasattr(show_tree, "write") else sys.stdout + stream.write("\n") + stream.write("--" + name) + stream.write("\n") stream.write(str(default)) - stream.write('\n') - stream.write('--') - stream.write('\n') + stream.write("\n") + stream.write("--") + stream.write("\n")