Merge pull request #69 from rocky/ast-reduce-checks

AST reduce checks
This commit is contained in:
R. Bernstein
2016-11-27 14:12:08 -05:00
committed by GitHub
16 changed files with 139 additions and 44 deletions

View File

@@ -37,7 +37,7 @@ entry_points={
'pydisassemble=uncompyle6.bin.pydisassemble:main',
]}
ftp_url = None
install_requires = ['spark-parser >= 1.4.3, < 1.5.0',
install_requires = ['spark-parser >= 1.5.0, < 1.6.0',
'xdis >= 3.2.3, < 3.3.0']
license = 'MIT'
mailing_list = 'python-debugger@googlegroups.com'

View File

@@ -104,7 +104,7 @@ check-bytecode-2.6:
#: Check deparsing Python 2.7
check-bytecode-2.7:
$(PYTHON) test_pythonlib.py --bytecode-2.7
$(PYTHON) test_pythonlib.py --bytecode-2.7 --verify
#: Check deparsing Python 3.0
check-bytecode-3.0:

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,18 @@
# Bug was using continue fouling up 1st elif, by confusing
# the "pass" for "continue" by not recognizing the if jump
# around it. We fixed by ignoring what's done in Python 2.7
# Better is better detection of control structures
def _compile_charset(charset, flags, code, fixup=None):
# compile charset subprogram
emit = code.append
if fixup is None:
fixup = 1
for op, av in charset:
if op is flags:
pass
elif op is code:
emit(fixup(av))
else:
raise RuntimeError
emit(5)

View File

@@ -0,0 +1,8 @@
# Bug from 3.4 threading. Bug is handling while/else
def acquire(self):
with self._cond:
while self:
rc = False
else:
rc = True
return rc

View File

@@ -189,17 +189,16 @@ def main(in_base, out_base, files, codes, outfile=None,
print(e)
verify_failed_files += 1
os.rename(outfile, outfile + '_unverified')
sys.stderr.write("### Error Verifying %s\n" % filename)
sys.stderr.write(str(e) + "\n")
if not outfile:
print("### Error Verifiying %s" % filename, file=sys.stderr)
print(e, file=sys.stderr)
if raise_on_error:
raise
pass
pass
pass
elif do_verify:
print("\n### uncompile successful, but no file to compare against",
file=sys.stderr)
sys.stderr.write("\n### uncompile successful, but no file to compare against\n")
pass
else:
okay_files += 1

View File

@@ -69,6 +69,25 @@ class PythonParser(GenericASTBuilder):
for i in dir(self):
setattr(self, i, None)
def debug_reduce(self, rule, tokens, parent, i):
"""Customized format and print for our kind of tokens
which gets called in debugging grammar reduce rules
"""
prefix = ''
if parent and tokens:
p_token = tokens[parent]
if hasattr(p_token, 'linestart') and p_token.linestart:
prefix = 'L.%3d: ' % p_token.linestart
else:
prefix = ' '
if hasattr(p_token, 'offset'):
prefix += "%3s " % str(p_token.offset)
prefix += " "
else:
prefix = ' '
print("%s%s ::= %s" % (prefix, rule[0], ' '.join(rule[1])))
def error(self, instructions, index):
# Find the last line boundary
for start in range(index, -1, -1):
@@ -466,6 +485,8 @@ class PythonParser(GenericASTBuilder):
_mklambda ::= load_closure mklambda
_mklambda ::= mklambda
# "and" where the first part of the and is true,
# so there is only the 2nd part to evaluate
and2 ::= _jump jmp_false COME_FROM expr COME_FROM
expr ::= conditional

View File

@@ -241,7 +241,7 @@ class Python2Parser(PythonParser):
"""
def add_custom_rules(self, tokens, customize):
'''
"""
Special handling for opcodes such as those that take a variable number
of arguments -- we add a new rule for each:
@@ -260,7 +260,7 @@ class Python2Parser(PythonParser):
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
PyPy adds custom rules here as well
'''
"""
for opname, v in list(customize.items()):
opname_base = opname[:opname.rfind('_')]
if opname == 'PyPy':
@@ -389,6 +389,26 @@ class Python2Parser(PythonParser):
else:
raise Exception('unknown customize token %s' % opname)
self.add_unique_rule(rule, opname_base, v, customize)
pass
self.check_reduce['augassign1'] = 'AST'
self.check_reduce['augassign2'] = 'AST'
self.check_reduce['_stmts'] = 'AST'
return
def reduce_is_invalid(self, rule, ast, tokens, first, last):
lhs = rule[0]
if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and':
return True
elif lhs == '_stmts':
for i, stmt in enumerate(ast):
if stmt == '_stmts':
stmt = stmt[0]
assert stmt == 'stmt'
if stmt[0] == 'return_stmt':
return i+1 != len(ast)
pass
return False
return False
class Python2ParserSingle(Python2Parser, PythonParserSingle):
pass

View File

@@ -146,8 +146,6 @@ class Python3Parser(PythonParser):
ifelsestmtr ::= testexpr return_if_stmts return_stmts
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel JUMP_BACK COME_FROM_LOOP
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel COME_FROM_LOOP
# FIXME: this feels like a hack. Is it just 1 or two
@@ -335,11 +333,12 @@ class Python3Parser(PythonParser):
whilestmt ::= SETUP_LOOP testexpr return_stmts POP_BLOCK
COME_FROM_LOOP
while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK
else_suite
whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
else_suite COME_FROM_LOOP
while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK
else_suite
whileelselaststmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
else_suitec COME_FROM_LOOP
@@ -348,6 +347,7 @@ class Python3Parser(PythonParser):
# FIXME: Python 3.? starts adding branch optimization? Put this starting there.
while1stmt ::= SETUP_LOOP l_stmts
while1stmt ::= SETUP_LOOP l_stmts COME_FROM_LOOP
# FIXME: investigate - can code really produce a NOP?
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP
@@ -680,8 +680,30 @@ class Python3Parser(PythonParser):
rule = ('mkfunc ::= %sload_closure LOAD_CONST %s'
% ('expr ' * args_pos, opname))
self.add_unique_rule(rule, opname, token.attr, customize)
pass
self.check_reduce['augassign1'] = 'AST'
self.check_reduce['augassign2'] = 'AST'
self.check_reduce['while1stmt'] = 'noAST'
return
def reduce_is_invalid(self, rule, ast, tokens, first, last):
lhs = rule[0]
if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and':
return True
elif lhs == 'while1stmt':
if tokens[last] in ('COME_FROM_LOOP', 'JUMP_BACK'):
# jump_back should be right afer SETUP_LOOP. Test?
last += 1
while last < len(tokens) and isinstance(tokens[last].offset, str):
last += 1
if last < len(tokens):
offset = tokens[last].offset
assert tokens[first] == 'SETUP_LOOP'
if offset != tokens[first].attr:
return True
return False
return False
class Python30Parser(Python3Parser):
def p_30(self, args):

View File

@@ -166,9 +166,9 @@ class Scanner2(scan.Scanner):
# continue
# last_offset = jump_offset
come_from_name = 'COME_FROM'
opname = self.opc.opname[self.code[jump_offset]]
if opname.startswith('SETUP_') and self.version == 2.7:
come_from_type = opname[len('SETUP_'):]
op_name = self.opc.opname[self.code[jump_offset]]
if op_name.startswith('SETUP_') and self.version == 2.7:
come_from_type = op_name[len('SETUP_'):]
if come_from_type not in ('LOOP', 'EXCEPT'):
come_from_name = 'COME_FROM_%s' % come_from_type
pass
@@ -179,7 +179,7 @@ class Scanner2(scan.Scanner):
jump_idx += 1
op = self.code[offset]
opname = self.opc.opname[op]
op_name = self.opc.opname[op]
oparg = None; pattr = None
has_arg = op_has_argument(op, self.opc)
@@ -194,14 +194,14 @@ class Scanner2(scan.Scanner):
if iscode(const):
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
assert op_name == 'LOAD_CONST'
op_name = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
op_name = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
op_name = 'LOAD_SETCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
@@ -237,20 +237,20 @@ class Scanner2(scan.Scanner):
self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE:
continue
else:
if self.is_pypy and not oparg and opname == 'BUILD_MAP':
opname = 'BUILD_MAP_n'
if self.is_pypy and not oparg and op_name == 'BUILD_MAP':
op_name = 'BUILD_MAP_n'
else:
opname = '%s_%d' % (opname, oparg)
op_name = '%s_%d' % (op_name, oparg)
if op != self.opc.BUILD_SLICE:
customize[opname] = oparg
elif self.is_pypy and opname in ('LOOKUP_METHOD',
customize[op_name] = oparg
elif self.is_pypy and op_name in ('LOOKUP_METHOD',
'JUMP_IF_NOT_DEBUG',
'SETUP_EXCEPT',
'SETUP_FINALLY'):
# The value in the dict is in special cases in semantic actions, such
# as CALL_FUNCTION. The value is not used in these cases, so we put
# in arbitrary value 0.
customize[opname] = 0
customize[op_name] = 0
elif op == self.opc.JUMP_ABSOLUTE:
# Further classify JUMP_ABSOLUTE into backward jumps
# which are used in loops, and "CONTINUE" jumps which
@@ -269,16 +269,16 @@ class Scanner2(scan.Scanner):
and self.code[offset+3] not in (self.opc.END_FINALLY,
self.opc.POP_BLOCK)
and offset not in self.not_continue):
opname = 'CONTINUE'
op_name = 'CONTINUE'
else:
opname = 'JUMP_BACK'
op_name = 'JUMP_BACK'
elif op == self.opc.LOAD_GLOBAL:
if offset in self.load_asserts:
opname = 'LOAD_ASSERT'
op_name = 'LOAD_ASSERT'
elif op == self.opc.RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
op_name = 'RETURN_END_IF'
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
@@ -287,7 +287,7 @@ class Scanner2(scan.Scanner):
if offset not in replace:
tokens.append(Token(
opname, oparg, pattr, offset, linestart, op,
op_name, oparg, pattr, offset, linestart, op,
has_arg, self.opc))
else:
tokens.append(Token(
@@ -782,6 +782,7 @@ class Scanner2(scan.Scanner):
if offset in self.ignore_if:
return
if self.version == 2.7:
if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \
and pre[rtarget] != offset and pre[pre[rtarget]] != offset:
if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK:
@@ -797,6 +798,7 @@ class Scanner2(scan.Scanner):
# Does the "if" jump just beyond a jump op, then this is probably an if statement
pre_rtarget = pre[rtarget]
code_pre_rtarget = code[pre_rtarget]
if code_pre_rtarget in self.jump_forward:
if_end = self.get_target(pre_rtarget)
@@ -824,6 +826,7 @@ class Scanner2(scan.Scanner):
self.structs.append({'type': 'if-then',
'start': start-3,
'end': pre_rtarget})
self.not_continue.add(pre_rtarget)
if rtarget < end:

View File

@@ -233,7 +233,7 @@ class Scanner26(scan.Scanner2):
if op != self.opc.BUILD_SLICE:
customize[op_name] = oparg
elif op == self.opc.JUMP_ABSOLUTE:
# Further classifhy JUMP_ABSOLUTE into backward jumps
# Further classify JUMP_ABSOLUTE into backward jumps
# which are used in loops, and "CONTINUE" jumps which
# may appear in a "continue" statement. The loop-type
# and continue-type jumps will help us classify loop
@@ -254,6 +254,9 @@ class Scanner26(scan.Scanner2):
# if x: continue
# the "continue" is not on a new line.
if tokens[-1].type == 'JUMP_BACK':
# We need 'intern' since we have
# already have processed the previous
# token.
tokens[-1].type = intern('CONTINUE')
elif op == self.opc.LOAD_GLOBAL:

View File

@@ -324,9 +324,10 @@ class Scanner3(Scanner):
# FIXME: this is a hack to catch stuff like:
# if x: continue
# the "continue" is not on a new line.
# There are other situations were we don't catch
# There are other situations where we don't catch
# CONTINUE as well.
if tokens[-1].type == 'JUMP_BACK':
if tokens[-1].type == 'JUMP_BACK' and tokens[-1].attr <= argval:
# intern is used because we are changing the *previous* token
tokens[-1].type = intern('CONTINUE')
elif op == self.opc.RETURN_VALUE: