Largish rework: scan while1stmt for jump out ..

to disambiguate.

For this, we use the self.opc JUMP_OPS sets.
For this, we neeed to store opc in the parse object.

DRY uses of "last = min(last, len(tokens))
This commit is contained in:
rocky
2020-01-23 13:02:29 -05:00
parent 0f4b791502
commit eeb48818f3
6 changed files with 34 additions and 39 deletions

View File

@@ -33,7 +33,8 @@ from uncompyle6.parsers.reducecheck import (
except_handler_else,
# iflaststmt,
testtrue,
tryelsestmtl3
tryelsestmtl3,
while1stmt
)
from uncompyle6.parsers.treenode import SyntaxTree
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
@@ -1520,6 +1521,7 @@ class Python3Parser(PythonParser):
def reduce_is_invalid(self, rule, ast, tokens, first, last):
lhs = rule[0]
n = len(tokens)
last = min(last, n-1)
if lhs in ("aug_assign1", "aug_assign2") and ast[0][0] == "and":
return True
elif lhs == "annotate_tuple":
@@ -1535,10 +1537,11 @@ class Python3Parser(PythonParser):
condition_jump = ast[0].last_child()
if condition_jump.kind.startswith("POP_JUMP_IF"):
condition_jump2 = tokens[min(last - 1, len(tokens) - 1)]
if condition_jump2.kind.startswith("POP_JUMP_IF"):
# If there are two *distinct* condition jumps, they should not jump to the
# same place. Otherwise we have some sort of "and"/"or".
if condition_jump2.kind.startswith("POP_JUMP_IF") and condition_jump != condition_jump2:
return condition_jump.attr == condition_jump2.attr
last = min(last, n-1)
if tokens[last] == "COME_FROM" and tokens[last].off2int() != condition_jump.attr:
return False
@@ -1569,35 +1572,9 @@ class Python3Parser(PythonParser):
return tryelsestmtl3(self, lhs, n, rule, ast, tokens, first, last)
elif lhs == "while1stmt":
# If there is a fall through to the COME_FROM_LOOP, then this is
# not a while 1. So the instruction before should either be a
# JUMP_BACK or the instruction before should not be the target of a
# jump. (Well that last clause i not quite right; that target could be
# from dead code. Ugh. We need a more uniform control flow analysis.)
if last == len(tokens) or tokens[last - 1] == "COME_FROM_LOOP":
cfl = last - 1
else:
cfl = last
assert tokens[cfl] == "COME_FROM_LOOP"
if while1stmt(self, lhs, n, rule, ast, tokens, first, last):
return True
for i in range(cfl - 1, first, -1):
if tokens[i] != "POP_BLOCK":
break
if tokens[i].kind not in ("JUMP_BACK", "RETURN_VALUE", "BREAK_LOOP"):
if not tokens[i].kind.startswith("COME_FROM"):
return True
# Check that the SETUP_LOOP jumps to the offset after the
# COME_FROM_LOOP
# Python 3.0 has additional:
# JUMP_FORWARD here
# COME_FROM
# POP_TOP
# COME_FROM
# here:
# (target of SETUP_LOOP)
# We won't check this.
if self.version == 3.0:
return False

View File

@@ -1105,6 +1105,7 @@ class Python37BaseParser(PythonParser):
def reduce_is_invalid(self, rule, ast, tokens, first, last):
lhs = rule[0]
n = len(tokens)
last = min(last, n-1)
fn = self.reduce_check_table.get(lhs, None)
if fn:
return fn(self, lhs, n, rule, ast, tokens, first, last)

View File

@@ -116,7 +116,7 @@ def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last):
if jf_cf_pop == "jf_cf_pop" and jf_cf_pop[0] == "JUMP_FORWARD":
jump_forward = jf_cf_pop[0]
endif_target = int(jump_forward.pattr)
last_offset = tokens[min(last, n-1)].off2int()
last_offset = tokens[last].off2int()
if endif_target != last_offset:
return True

View File

@@ -46,7 +46,6 @@ def ifstmt(self, lhs, n, rule, ast, tokens, first, last):
if testexpr[0] in ("testtrue", "testfalse"):
test = testexpr[0]
if len(test) > 1 and test[1].kind.startswith("jmp_"):
last = min(last, n-1)
jmp_target = test[1][0].attr
if (
tokens[first].off2int(True)

View File

@@ -14,21 +14,38 @@ def while1stmt(self, lhs, n, rule, ast, tokens, first, last):
cfl = last
assert tokens[cfl] == "COME_FROM_LOOP"
for i in range(cfl - 1, first, -1):
if tokens[i] != "POP_BLOCK":
for loop_end in range(cfl - 1, first, -1):
if tokens[loop_end] != "POP_BLOCK":
break
if tokens[i].kind not in ("JUMP_BACK", "RETURN_VALUE", "RAISE_VARARGS_1"):
if not tokens[i].kind.startswith("COME_FROM"):
if tokens[loop_end].kind not in ("JUMP_BACK", "RETURN_VALUE", "RAISE_VARARGS_1"):
if not tokens[loop_end].kind.startswith("COME_FROM"):
return True
# Check that the SETUP_LOOP jumps to the offset after the
# COME_FROM_LOOP
if 0 <= last < n and tokens[last] in ("COME_FROM_LOOP", "JUMP_BACK"):
if 0 <= last and tokens[last] in ("COME_FROM_LOOP", "JUMP_BACK"):
# jump_back should be right before COME_FROM_LOOP?
last += 1
if last == n:
last -= 1
offset = tokens[last].off2int()
assert tokens[first] == "SETUP_LOOP"
# Scan for jumps out of the loop. Skip the initial "SETUP_LOOP" instruction.
# If there is a JUMP_BACK at the end, jumping to that is not breaking out
# of the loop. However after that, any "POP_BLOCK"s or "COME_FROM_LOOP"s
# are considered to break out of the loop.
if tokens[loop_end] == "JUMP_BACK":
loop_end += 1
loop_end_offset = tokens[loop_end].off2int(prefer_last=False)
for t in range(first+1, loop_end):
token = tokens[t]
# token could be a pseudo-op like "LOAD_STR", which is not in
# self.opc. We will replace that with LOAD_CONST as an
# example of an instruction that is not in self.opc.JUMP_OPS
if self.opc.opmap.get(token.kind, "LOAD_CONST") in self.opc.JUMP_OPS:
if token.attr >= loop_end_offset:
return True
# SETUP_LOOP location must jump either to the last token or the token after the last one
return tokens[first].attr not in (offset, offset + 2)

View File

@@ -2440,6 +2440,7 @@ class SourceWalker(GenericASTTraversal, object):
# modularity is broken here
p_insts = self.p.insts
self.p.insts = self.scanner.insts
self.p.opc = self.scanner.opc
ast = python_parser.parse(self.p, tokens, customize)
self.p.insts = p_insts
except (python_parser.ParserError, AssertionError) as e: