Largish rework: scan while1stmt for jump out ..

to disambiguate. For this, we use the self.opc JUMP_OPS sets. For this, we neeed to store opc in the parse object. DRY uses of "last = min(last, len(tokens))
2025-08-02 16:44:46 +08:00 · 2020-01-23 13:02:29 -05:00
parent 0f4b791502
commit eeb48818f3
6 changed files with 34 additions and 39 deletions
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -33,7 +33,8 @@ from uncompyle6.parsers.reducecheck import (
    except_handler_else,
    # iflaststmt,
    testtrue,
-    tryelsestmtl3
+    tryelsestmtl3,
+    while1stmt
 )
 from uncompyle6.parsers.treenode import SyntaxTree
 from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
@@ -1520,6 +1521,7 @@ class Python3Parser(PythonParser):
    def reduce_is_invalid(self, rule, ast, tokens, first, last):
        lhs = rule[0]
        n = len(tokens)
+        last = min(last, n-1)
        if lhs in ("aug_assign1", "aug_assign2") and ast[0][0] == "and":
            return True
        elif lhs == "annotate_tuple":
@@ -1535,10 +1537,11 @@ class Python3Parser(PythonParser):
            condition_jump = ast[0].last_child()
            if condition_jump.kind.startswith("POP_JUMP_IF"):
                condition_jump2 = tokens[min(last - 1, len(tokens) - 1)]
-                if condition_jump2.kind.startswith("POP_JUMP_IF"):
+                # If there are two *distinct* condition jumps, they should not jump to the
+                # same place. Otherwise we have some sort of "and"/"or".
+                if condition_jump2.kind.startswith("POP_JUMP_IF") and condition_jump != condition_jump2:
                    return condition_jump.attr == condition_jump2.attr

-                last = min(last, n-1)
                if tokens[last] == "COME_FROM" and tokens[last].off2int() != condition_jump.attr:
                    return False

@@ -1569,35 +1572,9 @@ class Python3Parser(PythonParser):
            return tryelsestmtl3(self, lhs, n, rule, ast, tokens, first, last)
        elif lhs == "while1stmt":

-            # If there is a fall through to the COME_FROM_LOOP, then this is
-            # not a while 1. So the instruction before should either be a
-            # JUMP_BACK or the instruction before should not be the target of a
-            # jump. (Well that last clause i not quite right; that target could be
-            # from dead code. Ugh. We need a more uniform control flow analysis.)
-            if last == len(tokens) or tokens[last - 1] == "COME_FROM_LOOP":
-                cfl = last - 1
-            else:
-                cfl = last
-            assert tokens[cfl] == "COME_FROM_LOOP"
+            if while1stmt(self, lhs, n, rule, ast, tokens, first, last):
+                return True

-            for i in range(cfl - 1, first, -1):
-                if tokens[i] != "POP_BLOCK":
-                    break
-            if tokens[i].kind not in ("JUMP_BACK", "RETURN_VALUE", "BREAK_LOOP"):
-                if not tokens[i].kind.startswith("COME_FROM"):
-                    return True
-
-            # Check that the SETUP_LOOP jumps to the offset after the
-            # COME_FROM_LOOP
-
-            # Python 3.0 has additional:
-            #     JUMP_FORWARD here
-            #     COME_FROM
-            #     POP_TOP
-            #     COME_FROM
-            #  here:
-            #     (target of SETUP_LOOP)
-            # We won't check this.
            if self.version == 3.0:
                return False

--- a/uncompyle6/parsers/parse37base.py
+++ b/uncompyle6/parsers/parse37base.py
@@ -1105,6 +1105,7 @@ class Python37BaseParser(PythonParser):
    def reduce_is_invalid(self, rule, ast, tokens, first, last):
        lhs = rule[0]
        n = len(tokens)
+        last = min(last, n-1)
        fn = self.reduce_check_table.get(lhs, None)
        if fn:
            return fn(self, lhs, n, rule, ast, tokens, first, last)
--- a/uncompyle6/parsers/reducecheck/ifelsestmt.py
+++ b/uncompyle6/parsers/reducecheck/ifelsestmt.py
@@ -116,7 +116,7 @@ def ifelsestmt(self, lhs, n, rule, ast, tokens, first, last):
            if jf_cf_pop == "jf_cf_pop" and jf_cf_pop[0] == "JUMP_FORWARD":
                jump_forward = jf_cf_pop[0]
                endif_target = int(jump_forward.pattr)
-                last_offset = tokens[min(last, n-1)].off2int()
+                last_offset = tokens[last].off2int()
                if endif_target != last_offset:
                    return True

--- a/uncompyle6/parsers/reducecheck/ifstmt.py
+++ b/uncompyle6/parsers/reducecheck/ifstmt.py
@@ -46,7 +46,6 @@ def ifstmt(self, lhs, n, rule, ast, tokens, first, last):
        if testexpr[0] in ("testtrue", "testfalse"):
            test = testexpr[0]
            if len(test) > 1 and test[1].kind.startswith("jmp_"):
-                last = min(last, n-1)
                jmp_target = test[1][0].attr
                if (
                    tokens[first].off2int(True)
--- a/uncompyle6/parsers/reducecheck/while1stmt.py
+++ b/uncompyle6/parsers/reducecheck/while1stmt.py
@@ -14,21 +14,38 @@ def while1stmt(self, lhs, n, rule, ast, tokens, first, last):
        cfl = last
    assert tokens[cfl] == "COME_FROM_LOOP"

-    for i in range(cfl - 1, first, -1):
-        if tokens[i] != "POP_BLOCK":
+    for loop_end in range(cfl - 1, first, -1):
+        if tokens[loop_end] != "POP_BLOCK":
            break
-    if tokens[i].kind not in ("JUMP_BACK", "RETURN_VALUE", "RAISE_VARARGS_1"):
-        if not tokens[i].kind.startswith("COME_FROM"):
+    if tokens[loop_end].kind not in ("JUMP_BACK", "RETURN_VALUE", "RAISE_VARARGS_1"):
+        if not tokens[loop_end].kind.startswith("COME_FROM"):
            return True
-
    # Check that the SETUP_LOOP jumps to the offset after the
    # COME_FROM_LOOP
-    if 0 <= last < n and tokens[last] in ("COME_FROM_LOOP", "JUMP_BACK"):
+    if 0 <= last and tokens[last] in ("COME_FROM_LOOP", "JUMP_BACK"):
        # jump_back should be right before COME_FROM_LOOP?
        last += 1
    if last == n:
        last -= 1
    offset = tokens[last].off2int()
    assert tokens[first] == "SETUP_LOOP"
+
+    # Scan for jumps out of the loop. Skip the initial "SETUP_LOOP" instruction.
+    # If there is a JUMP_BACK at the end, jumping to that is not breaking out
+    # of the loop. However after that, any "POP_BLOCK"s or "COME_FROM_LOOP"s
+    # are considered to break out of the loop.
+    if tokens[loop_end] == "JUMP_BACK":
+        loop_end += 1
+    loop_end_offset = tokens[loop_end].off2int(prefer_last=False)
+    for t in range(first+1, loop_end):
+        token = tokens[t]
+        # token could be a pseudo-op like "LOAD_STR", which is not in
+        # self.opc.  We will replace that with LOAD_CONST as an
+        # example of an instruction that is not in self.opc.JUMP_OPS
+        if self.opc.opmap.get(token.kind, "LOAD_CONST") in self.opc.JUMP_OPS:
+            if token.attr >= loop_end_offset:
+                return True
+
+
    # SETUP_LOOP location must jump either to the last token or the token after the last one
    return tokens[first].attr not in (offset, offset + 2)
--- a/uncompyle6/semantics/pysource.py
+++ b/uncompyle6/semantics/pysource.py
@@ -2440,6 +2440,7 @@ class SourceWalker(GenericASTTraversal, object):
            # modularity is broken here
            p_insts = self.p.insts
            self.p.insts = self.scanner.insts
+            self.p.opc = self.scanner.opc
            ast = python_parser.parse(self.p, tokens, customize)
            self.p.insts = p_insts
        except (python_parser.ParserError, AssertionError) as e: