Add tree transformation phase...

if ... else if ... ->  if ... elif ..
if .. : raise AsssertionError ->  assert

Add options --tree=before --tree=after  -T expanded to include this

This code ported from decompyle3. x0ret did all the heavy lifting.
This commit is contained in:
rocky
2019-08-21 08:11:42 -04:00
parent 73619de3f5
commit 1ebfde6927
6 changed files with 433 additions and 122 deletions

View File

@@ -46,10 +46,12 @@ Options:
--help show this message
Debugging Options:
--asm | -a include byte-code (disables --verify)
--grammar | -g show matching grammar
--tree | -t include syntax tree (disables --verify)
--tree++ add template rules to --tree when possible
--asm | -a include byte-code (disables --verify)
--grammar | -g show matching grammar
--tree={before|after}
-t {before|after} include syntax before (or after) tree transformation
(disables --verify)
--tree++ | -T add template rules to --tree=before when possible
Extensions of generated files:
'.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify)
@@ -89,7 +91,7 @@ def main_bin():
try:
opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:',
'help asm compile= grammar linemaps recurse '
'timestamp tree tree+ '
'timestamp tree= tree+ '
'fragments verify verify-run version '
'syntax-verify '
'showgrammar encoding='.split(' '))
@@ -119,10 +121,19 @@ def main_bin():
options['showasm'] = 'after'
options['do_verify'] = None
elif opt in ('--tree', '-t'):
options['showast'] = True
if 'showast' not in options:
options['showast'] = {}
if val == 'before':
options['showast'][val] = True
elif val == 'after':
options['showast'][val] = True
else:
options['showast']['before'] = True
options['do_verify'] = None
elif opt in ('--tree+', '-T'):
options['showast'] = 'Full'
if 'showast' not in options:
options['showast'] = {}
options['showast']['Full'] = True
options['do_verify'] = None
elif opt in ('--grammar', '-g'):
options['showgrammar'] = True

View File

@@ -45,10 +45,21 @@ def _get_outstream(outfile):
return open(outfile, mode='w', encoding='utf-8')
def decompile(
bytecode_version, co, out=None, showasm=None, showast=False,
timestamp=None, showgrammar=False, source_encoding=None, code_objects={},
source_size=None, is_pypy=None, magic_int=None,
mapstream=None, do_fragments=False):
bytecode_version,
co,
out=None,
showasm=None,
showast={},
timestamp=None,
showgrammar=False,
source_encoding=None,
code_objects={},
source_size=None,
is_pypy=None,
magic_int=None,
mapstream=None,
do_fragments=False,
):
"""
ingests and deparses a given code block 'co'
@@ -294,7 +305,7 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
# failed_files += 1
# if current_outfile:
# outstream.close()
# os.rename(current_outfile, current_outfile + '_failed')
# os.rename(current_outfile, current_outfile + "_failed")
# else:
# sys.stderr.write("\n# %s" % sys.exc_info()[1])
# sys.stderr.write("\n# Can't uncompile %s\n" % infile)

View File

@@ -7,6 +7,10 @@ if PYTHON3:
intern = sys.intern
class SyntaxTree(spark_AST):
def __init__(self, *args, **kwargs):
super(SyntaxTree, self).__init__(*args, **kwargs)
self.transformed_by = None
def isNone(self):
"""An SyntaxTree None token. We can't use regular list comparisons
because SyntaxTree token offsets might be different"""
@@ -23,6 +27,11 @@ class SyntaxTree(spark_AST):
if len(self) > 1:
rv += " (%d)" % (len(self))
enumerate_children = True
if self.transformed_by is not None:
if self.transformed_by is True:
rv += " (transformed)"
else:
rv += " (transformed by %s)" % self.transformed_by
rv = indent + rv
indent += ' '
i = 0

View File

@@ -146,8 +146,13 @@ from uncompyle6.semantics.helper import (
find_globals_and_nonlocals,
flatten_list,
)
from uncompyle6.scanners.tok import Token
from uncompyle6.semantics.transform import (
is_docstring,
TreeTransform,
)
from uncompyle6.semantics.consts import (
LINE_LENGTH,
RETURN_LOCALS,
@@ -176,13 +181,6 @@ else:
from StringIO import StringIO
def is_docstring(node):
try:
return node[0][0].kind == "assign" and node[0][0][1][0].pattr == "__doc__"
except:
return False
class SourceWalkerError(Exception):
def __init__(self, errmsg):
self.errmsg = errmsg
@@ -230,6 +228,7 @@ class SourceWalker(GenericASTTraversal, object):
"""
GenericASTTraversal.__init__(self, ast=None)
self.scanner = scanner
params = {"f": out, "indent": ""}
self.version = version
@@ -239,6 +238,8 @@ class SourceWalker(GenericASTTraversal, object):
compile_mode=compile_mode,
is_pypy=is_pypy,
)
self.treeTransform = TreeTransform(version, showast)
self.debug_parser = dict(debug_parser)
self.showast = showast
self.params = params
@@ -277,6 +278,19 @@ class SourceWalker(GenericASTTraversal, object):
return
def maybe_show_tree(self, ast):
if self.showast and self.treeTransform.showast:
self.println(
"""
---- end before transform
---- begin after transform
"""
+ " "
)
if isinstance(self.showast, dict) and self.showast.get:
maybe_show_tree(self, ast)
def str_with_template(self, ast):
stream = sys.stdout
stream.write(self.str_with_template1(ast, "", None))
@@ -299,6 +313,13 @@ class SourceWalker(GenericASTTraversal, object):
key = key[i]
pass
if ast.transformed_by is not None:
if ast.transformed_by is True:
rv += " transformed"
else:
rv += " transformed by %s" % ast.transformed_by
pass
pass
if key.kind in table:
rv += ": %s" % str(table[key.kind])
@@ -306,6 +327,7 @@ class SourceWalker(GenericASTTraversal, object):
indent += " "
i = 0
for node in ast:
if hasattr(node, "__repr1__"):
if enumerate_children:
child = self.str_with_template1(node, indent, i)
@@ -685,89 +707,6 @@ class SourceWalker(GenericASTTraversal, object):
self.println()
self.prune() # stop recursing
# preprocess is used for handling chains of
# if elif elif
def n_ifelsestmt(self, node, preprocess=False):
"""
Here we turn:
if ...
else
if ..
into:
if ..
elif ...
[else ...]
where appropriate
"""
else_suite = node[3]
n = else_suite[0]
old_stmts = None
if len(n) == 1 == len(n[0]) and n[0] == "stmt":
n = n[0][0]
elif n[0].kind in ("lastc_stmt", "lastl_stmt"):
n = n[0]
if n[0].kind in (
"ifstmt",
"iflaststmt",
"iflaststmtl",
"ifelsestmtl",
"ifelsestmtc",
):
# This seems needed for Python 2.5-2.7
n = n[0]
pass
pass
elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt":
else_suite_stmts = n[0]
if else_suite_stmts[0].kind not in ("ifstmt", "iflaststmt", "ifelsestmtl"):
if not preprocess:
self.default(node)
return
old_stmts = n
n = else_suite_stmts[0]
else:
if not preprocess:
self.default(node)
return
if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"):
node.kind = "ifelifstmt"
n.kind = "elifstmt"
elif n.kind in ("ifelsestmtr",):
node.kind = "ifelifstmt"
n.kind = "elifelsestmtr"
elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"):
node.kind = "ifelifstmt"
self.n_ifelsestmt(n, preprocess=True)
if n == "ifelifstmt":
n.kind = "elifelifstmt"
elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"):
n.kind = "elifelsestmt"
if not preprocess:
if old_stmts:
if n.kind == "elifstmt":
trailing_else = SyntaxTree("stmts", old_stmts[1:])
# We use elifelsestmtr because it has 3 nodes
elifelse_stmt = SyntaxTree(
"elifelsestmtr", [n[0], n[1], trailing_else]
)
node[3] = elifelse_stmt
pass
else:
# Other cases for n.kind may happen here
return
pass
self.default(node)
n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt
def n_ifelsestmtr(self, node):
if node[2] == "COME_FROM":
return_stmts_node = node[3]
@@ -899,17 +838,19 @@ class SourceWalker(GenericASTTraversal, object):
def n_mkfunc(self, node):
if self.version >= 3.3 or node[-2] in ("kwargs", "no_kwargs"):
# LOAD_CONST code object ..
# LOAD_CONST 'x0' if >= 3.3
# LOAD_CODET code object ..
# LOAD_CONST "x0" if >= 3.3
# MAKE_FUNCTION ..
code_node = node[-3]
elif node[-2] == "expr":
code_node = node[-2][0]
else:
# LOAD_CONST code object ..
# LOAD_CODE code object ..
# MAKE_FUNCTION ..
code_node = node[-2]
assert iscode(code_node.attr)
func_name = code_node.attr.co_name
self.write(func_name)
@@ -930,6 +871,75 @@ class SourceWalker(GenericASTTraversal, object):
else:
make_function2(self, node, is_lambda, nested, code_node)
def n_docstring(self, node):
indent = self.indent
docstring = node[0].pattr
quote = '"""'
if docstring.find(quote) >= 0:
if docstring.find("'''") == -1:
quote = "'''"
self.write(indent)
docstring = repr(docstring.expandtabs())[1:-1]
for (orig, replace) in (('\\\\', '\t'),
('\\r\\n', '\n'),
('\\n', '\n'),
('\\r', '\n'),
('\\"', '"'),
("\\'", "'")):
docstring = docstring.replace(orig, replace)
# Do a raw string if there are backslashes but no other escaped characters:
# also check some edge cases
if ('\t' in docstring
and '\\' not in docstring
and len(docstring) >= 2
and docstring[-1] != '\t'
and (docstring[-1] != '"'
or docstring[-2] == '\t')):
self.write('r') # raw string
# Restore backslashes unescaped since raw
docstring = docstring.replace('\t', '\\')
else:
# Escape the last character if it is the same as the
# triple quote character.
quote1 = quote[-1]
if len(docstring) and docstring[-1] == quote1:
docstring = docstring[:-1] + '\\' + quote1
# Escape triple quote when needed
if quote == '"""':
replace_str = '\\"""'
else:
assert quote == "'''"
replace_str = "\\'''"
docstring = docstring.replace(quote, replace_str)
docstring = docstring.replace('\t', '\\\\')
lines = docstring.split('\n')
self.write(quote)
if len(lines) == 0:
self.println(quote)
elif len(lines) == 1:
self.println(lines[0], quote)
else:
self.println(lines[0])
for line in lines[1:-1]:
if line:
self.println( line )
else:
self.println( "\n\n" )
pass
pass
self.println(lines[-1], quote)
self.prune()
def n_mklambda(self, node):
self.make_function(node, is_lambda=True, code_node=node[-2])
self.prune() # stop recursing
@@ -1816,6 +1826,19 @@ class SourceWalker(GenericASTTraversal, object):
lastnode = node.pop()
lastnodetype = lastnode.kind
# If this build list is inside a CALL_FUNCTION_VAR,
# then the first * has already been printed.
# Until I have a better way to check for CALL_FUNCTION_VAR,
# will assume that if the text ends in *.
last_was_star = self.f.getvalue().endswith("*")
if lastnodetype.endswith("UNPACK"):
# FIXME: need to handle range of BUILD_LIST_UNPACK
have_star = True
# endchar = ''
else:
have_star = False
if lastnodetype.startswith("BUILD_LIST"):
self.write("[")
endchar = "]"
@@ -1866,6 +1889,13 @@ class SourceWalker(GenericASTTraversal, object):
else:
if sep != "":
sep += " "
if not last_was_star:
if have_star:
sep += "*"
pass
pass
else:
last_was_star = False
self.write(sep, value)
sep = ","
if lastnode.attr == 1 and lastnodetype.startswith("BUILD_TUPLE"):
@@ -2219,6 +2249,10 @@ class SourceWalker(GenericASTTraversal, object):
code._tokens = None # save memory
assert ast == "stmts"
if ast[0] == "docstring":
self.println(self.traverse(ast[0]))
del ast[0]
first_stmt = ast[0][0]
if 3.0 <= self.version <= 3.3:
try:
@@ -2364,8 +2398,10 @@ class SourceWalker(GenericASTTraversal, object):
self.p.insts = p_insts
except (python_parser.ParserError, AssertionError) as e:
raise ParserError(e, tokens)
maybe_show_tree(self, ast)
return ast
transform_ast = self.treeTransform.transform(ast)
self.maybe_show_tree(ast)
del ast # Save memory
return transform_ast
# The bytecode for the end of the main routine has a
# "return None". However you can't issue a "return" statement in
@@ -2397,11 +2433,15 @@ class SourceWalker(GenericASTTraversal, object):
except (python_parser.ParserError, AssertionError) as e:
raise ParserError(e, tokens)
maybe_show_tree(self, ast)
checker(ast, False, self.ast_errors)
return ast
self.customize(customize)
transform_ast = self.treeTransform.transform(ast)
self.maybe_show_tree(ast)
del ast # Save memory
return transform_ast
@classmethod
def _get_mapping(cls, node):

View File

@@ -0,0 +1,239 @@
# Copyright (c) 2019 by Rocky Bernstein
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from uncompyle6.show import maybe_show_tree
from copy import copy
from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException
from uncompyle6.parsers.treenode import SyntaxTree
from uncompyle6.scanners.tok import Token
from uncompyle6.semantics.consts import RETURN_NONE
def is_docstring(node):
try:
return node[0][0].kind == "assign" and node[0][0][1][0].pattr == "__doc__"
except:
return False
class TreeTransform(GenericASTTraversal, object):
def __init__(self, version, show_ast=None):
self.version = version
self.showast = show_ast
return
def maybe_show_tree(self, ast):
if isinstance(self.showast, dict) and self.showast:
maybe_show_tree(self, ast)
def preorder(self, node=None):
"""Walk the tree in roughly 'preorder' (a bit of a lie explained below).
For each node with typestring name *name* if the
node has a method called n_*name*, call that before walking
children.
In typical use a node with children can call "preorder" in any
order it wants which may skip children or order then in ways
other than first to last. In fact, this this happens. So in
this sense this function not strictly preorder.
"""
if node is None:
node = self.ast
try:
name = "n_" + self.typestring(node)
if hasattr(self, name):
func = getattr(self, name)
node = func(node)
except GenericASTTraversalPruningException:
return
for i, kid in enumerate(node):
node[i] = self.preorder(kid)
return node
def n_ifstmt(self, node):
"""Here we check if we can turn an `ifstmt` or 'iflaststmtl` into
some kind of `assert` statement"""
testexpr = node[0]
if testexpr.kind != "testexpr":
return node
if node.kind == "ifstmt":
ifstmts_jump = node[1]
if node[1] != "_ifstmts_jump":
return node
stmts = ifstmts_jump[0]
else:
# iflaststmtl works this way
stmts = node[1]
if stmts in ("c_stmts",) and len(stmts) == 1:
stmt = stmts[0]
raise_stmt = stmt[0]
if raise_stmt == "raise_stmt1" and len(testexpr[0]) == 2:
assert_expr = testexpr[0][0]
assert_expr.kind = "assert_expr"
jmp_true = testexpr[0][1]
expr = raise_stmt[0]
RAISE_VARARGS_1 = raise_stmt[1]
if expr[0] == "call":
# ifstmt
# 0. testexpr
# testtrue (2)
# 0. expr
# 1. _ifstmts_jump (2)
# 0. c_stmts
# stmt
# raise_stmt1 (2)
# 0. expr
# call (3)
# 1. RAISE_VARARGS_1
# becomes:
# assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_1 COME_FROM
call = expr[0]
LOAD_ASSERT = call[0]
expr = call[1][0]
node = SyntaxTree(
"assert2",
[assert_expr, jmp_true, LOAD_ASSERT, expr, RAISE_VARARGS_1]
)
node.transformed_by="n_ifstmt",
else:
# ifstmt
# 0. testexpr (2)
# testtrue
# 0. expr
# 1. _ifstmts_jump (2)
# 0. c_stmts
# stmts
# raise_stmt1 (2)
# 0. expr
# LOAD_ASSERT
# 1. RAISE_VARARGS_1
# becomes:
# assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM
LOAD_ASSERT = expr[0]
node = SyntaxTree(
"assert",
[assert_expr, jmp_true, LOAD_ASSERT, RAISE_VARARGS_1]
)
node.transformed_by="n_ifstmt",
pass
pass
return node
n_iflaststmtl = n_ifstmt
# preprocess is used for handling chains of
# if elif elif
def n_ifelsestmt(self, node, preprocess=False):
"""
Here we turn:
if ...
else
if ..
into:
if ..
elif ...
[else ...]
where appropriate
"""
else_suite = node[3]
n = else_suite[0]
old_stmts = None
if len(n) == 1 == len(n[0]) and n[0] == "stmt":
n = n[0][0]
elif n[0].kind in ("lastc_stmt", "lastl_stmt"):
n = n[0]
if n[0].kind in (
"ifstmt",
"iflaststmt",
"iflaststmtl",
"ifelsestmtl",
"ifelsestmtc",
):
# This seems needed for Python 2.5-2.7
n = n[0]
pass
pass
elif len(n) > 1 and 1 == len(n[0]) and n[0] == "stmt" and n[1].kind == "stmt":
else_suite_stmts = n[0]
if else_suite_stmts[0].kind not in ("ifstmt", "iflaststmt", "ifelsestmtl"):
return node
old_stmts = n
n = else_suite_stmts[0]
else:
return node
if n.kind in ("ifstmt", "iflaststmt", "iflaststmtl"):
node.kind = "ifelifstmt"
n.kind = "elifstmt"
elif n.kind in ("ifelsestmtr",):
node.kind = "ifelifstmt"
n.kind = "elifelsestmtr"
elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"):
node.kind = "ifelifstmt"
self.n_ifelsestmt(n, preprocess=True)
if n == "ifelifstmt":
n.kind = "elifelifstmt"
elif n.kind in ("ifelsestmt", "ifelsestmtc", "ifelsestmtl"):
n.kind = "elifelsestmt"
if not preprocess:
if old_stmts:
if n.kind == "elifstmt":
trailing_else = SyntaxTree("stmts", old_stmts[1:])
# We use elifelsestmtr because it has 3 nodes
elifelse_stmt = SyntaxTree(
"elifelsestmtr", [n[0], n[1], trailing_else]
)
node[3] = elifelse_stmt
pass
else:
# Other cases for n.kind may happen here
pass
pass
node.transformed_by = "n_ifelsestmt"
return node
n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt
def traverse(self, node, is_lambda=False):
node = self.preorder(node)
return node
def transform(self, ast):
self.maybe_show_tree(ast)
self.ast = copy(ast)
self.ast = self.traverse(self.ast, is_lambda=False)
if self.ast[-1] == RETURN_NONE:
self.ast.pop() # remove last node
# todo: if empty, add 'pass'
return self.ast
# Write template_engine
# def template_engine

View File

@@ -26,10 +26,10 @@ def maybe_show_asm(showasm, tokens):
:param tokens: The asm tokens to show.
"""
if showasm:
stream = showasm if hasattr(showasm, 'write') else sys.stdout
stream = showasm if hasattr(showasm, "write") else sys.stdout
for t in tokens:
stream.write(str(t))
stream.write('\n')
stream.write("\n")
def maybe_show_tree(walker, ast):
@@ -43,15 +43,16 @@ def maybe_show_tree(walker, ast):
:param ast: The ast to show.
"""
if walker.showast:
if hasattr(walker.showast, 'write'):
if hasattr(walker.showast, "write"):
stream = walker.showast
else:
stream = sys.stdout
if walker.showast == 'Full':
if (isinstance(walker.showast, dict) and walker.showast.get("Full", False)
and hasattr(walker, "str_with_template")):
walker.str_with_template(ast)
else:
stream.write(str(ast))
stream.write('\n')
stream.write("\n")
def maybe_show_tree_param_default(show_tree, name, default):
@@ -68,11 +69,11 @@ def maybe_show_tree_param_default(show_tree, name, default):
:param default: The function parameter default.
"""
if show_tree:
stream = show_tree if hasattr(show_tree, 'write') else sys.stdout
stream.write('\n')
stream.write('--' + name)
stream.write('\n')
stream = show_tree if hasattr(show_tree, "write") else sys.stdout
stream.write("\n")
stream.write("--" + name)
stream.write("\n")
stream.write(str(default))
stream.write('\n')
stream.write('--')
stream.write('\n')
stream.write("\n")
stream.write("--")
stream.write("\n")