Conditional jumps to extended-arg JUMP_BACK...

this is a major reworking. But it leads the way forward to a simpler grammar.
2025-08-02 16:44:46 +08:00 · 2020-01-22 04:49:44 -05:00
parent c6069eb7f8
commit bc50825460
10 changed files with 211 additions and 45 deletions
--- a/pytest/test_token.py
+++ b/pytest/test_token.py
@@ -1,24 +1,25 @@
 from uncompyle6 import PYTHON_VERSION
 from uncompyle6.scanners.tok import Token

+
 def test_token():
    # Test token formatting of: LOAD_CONST None
-    t = Token('LOAD_CONST', offset=0, attr=None, pattr=None, has_arg=True)
-    expect = '           0  LOAD_CONST               None'
+    t = Token("LOAD_CONST", offset=0, attr=None, pattr=None, has_arg=True)
+    expect = "             0  LOAD_CONST               None"
    # print(t.format())
    assert t
    assert t.format() == expect

    # Make sure equality testing of tokens ignores offset
-    t2 = Token('LOAD_CONST', offset=2, attr=None, pattr=None, has_arg=True)
+    t2 = Token("LOAD_CONST", offset=2, attr=None, pattr=None, has_arg=True)
    assert t2 == t

-
    # Make sure formatting of: LOAD_CONST False. We assume False is the 0th index
    # of co_consts.
-    t = Token('LOAD_CONST', offset=1, attr=False, pattr=False, has_arg=True)
-    expect = '           1  LOAD_CONST               False'
+    t = Token("LOAD_CONST", offset=1, attr=False, pattr=False, has_arg=True)
+    expect = "             1  LOAD_CONST               False"
    assert t.format() == expect

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    test_token()
--- a/pytest/testdata/if-2.7.right
+++ b/pytest/testdata/if-2.7.right
@@ -1,12 +1,12 @@
 # Python 2.7
 # Embedded file name: simple_source/branching/05_if.py

-   6       0  LOAD_NAME             0  'True'
-           3  POP_JUMP_IF_FALSE    15  'to 15'
+   6         0  LOAD_NAME             0  'True'
+             3  POP_JUMP_IF_FALSE    15  'to 15'

-   7       6  LOAD_NAME             1  'False'
-           9  STORE_NAME            2  'b'
-          12  JUMP_FORWARD          0  'to 15'
-        15_0  COME_FROM            12  '12'
-          15  LOAD_CONST               None
-          18  RETURN_VALUE     
+   7         6  LOAD_NAME             1  'False'
+             9  STORE_NAME            2  'b'
+            12  JUMP_FORWARD          0  'to 15'
+          15_0  COME_FROM            12  '12'
+            15  LOAD_CONST               None
+            18  RETURN_VALUE     
--- a/pytest/testdata/ifelse-2.7.right
+++ b/pytest/testdata/ifelse-2.7.right
@@ -1,15 +1,15 @@
 # Python 2.7
 # Embedded file name: simple_source/branching/05_ifelse.py

-   3       0  LOAD_NAME             0  'True'
-           3  POP_JUMP_IF_FALSE    15  'to 15'
+   3         0  LOAD_NAME             0  'True'
+             3  POP_JUMP_IF_FALSE    15  'to 15'

-   4       6  LOAD_CONST               1
-           9  STORE_NAME            1  'b'
-          12  JUMP_FORWARD          6  'to 21'
+   4         6  LOAD_CONST               1
+             9  STORE_NAME            1  'b'
+            12  JUMP_FORWARD          6  'to 21'

-   6      15  LOAD_CONST               2
-          18  STORE_NAME            2  'd'
-        21_0  COME_FROM            12  '12'
-          21  LOAD_CONST               None
-          24  RETURN_VALUE     
+   6        15  LOAD_CONST               2
+            18  STORE_NAME            2  'd'
+          21_0  COME_FROM            12  '12'
+            21  LOAD_CONST               None
+            24  RETURN_VALUE     
--- a/test/simple_source/bug36/01_extended_arg.py
+++ b/test/simple_source/bug36/01_extended_arg.py
@@ -1,2 +1,79 @@
 if __file__:
    0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
+
+
+# From 3.7 test_buffer.py
+# Bug is in dealing with EXTENDED_ARG instructions.
+# In reduction-rule tests where we are testing the offset,
+# getting *which* offset to test against, when there are two
+# possible offset, can mess us up.
+
+def five(a):
+    return 5
+
+def test_ndarray_slice_multidim(a, f, listerr):
+    for slices in a:
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 1
+        shape_t = 2
+        shape_t = 3
+        shape_t = 4
+        shape_t = 5
+        shape_t = 6
+        shape_t = 7
+        shape_t = 8
+        shape_t = 9
+        nderr = None
+        if nderr or listerr:
+            return f(5)
+        else:
+            return 2
+
+assert test_ndarray_slice_multidim([1], five, False) == 2
+assert test_ndarray_slice_multidim([1], five, True) == 5
--- a/test/simple_source/looping/10_for_if_loopback.py
+++ b/test/simple_source/looping/10_for_if_loopback.py
@@ -37,3 +37,22 @@ assert lasti2lineno([], True) == -1
 assert lasti2lineno([], False) == -1
 assert lasti2lineno([1], False) == -1
 assert lasti2lineno([1], True) == 1
+
+# From 3.7 test_builtin.py
+# Bug was allowing if condition jump back to the
+# "for" loop as an acceptable "ifstmtl" rule.
+
+# RUNNABLE!
+def test_pow(m, b, c):
+    for a in m:
+        if a or \
+           b or \
+           c:
+            c = 1
+
+    return c
+
+assert test_pow([], 2, 3) == 3
+assert test_pow([1], 0, 5) == 1
+assert test_pow([1], 4, 2) == 1
+assert test_pow([0], 0, 0) == 0
--- a/test/stdlib/3.7-exclude.sh
+++ b/test/stdlib/3.7-exclude.sh
@@ -6,8 +6,8 @@ SKIP_TESTS=(
    [test_atexit.py]=1  # The atexit test looks for specific comments in error lines
    [test_baseexception.py]=1  # UnboundLocalError: local variable 'exc' referenced before assignment
    [test_bdb.py]=1  #
-    [test_buffer.py]=1  # parse error
-    [test_builtin.py]=1  # parser error
+    [test_buffer.py]=1  # test assertion errors
+    [test_builtin.py]=1  # test assertion errors
    [test_clinic.py]=1 # it fails on its own
    [test_cmath.py]=1 # test assertion failure
    [test_cmd_line.py]=1  # Interactive?
@@ -25,7 +25,7 @@ SKIP_TESTS=(
    [test_datetime.py]=1   # Takes too long
    [test_dbm_gnu.py]=1   # Takes too long
    [test_dbm_ndbm.py]=1 # it fails on its own
-    [test_decimal.py]=1   # Parse error
+    [test_decimal.py]=1   # test assertion failures
    [test_descr.py]=1   # test assertion failures
    [test_devpoll.py]=1 # it fails on its own
    [test_dis.py]=1   # We change line numbers - duh!
@@ -39,7 +39,7 @@ SKIP_TESTS=(
    [test_format.py]=1 # Probalby not handling bytestrings properly
    [test_frame.py]=1
    [test_ftplib.py]=1
-    [test_functools.py]=1 # parser error
+    [test_functools.py]=1 # parse error
    [test_gdb.py]=1 # it fails on its own
    [test_generators.py]=1  # improper decompile of assert i < n and (n-i) % 3 == 0
    [test_glob.py]=1  # TypeError: join() argument must be str or bytes, not 'tuple'
@@ -50,12 +50,10 @@ SKIP_TESTS=(
    [test_idle.py]=1 # Probably installation specific
    [test_io.py]=1 # test takes too long to run: 37 seconds
    [test_imaplib.py]=1
-    [test_index.py]=1  # parse error
    [test_inspect.py]=1 # test failures
-    [test_itertools.py]=1 # parse error
    [test_kqueue.py]=1 # it fails on its own
    [test_lib2to3.py]=1 # it fails on its own
-    [test_long.py]=1 # investigate
+    [test_long.py]=1 # FIX: if boundaries wrong in Rat __init__
    [test_logging.py]=1 # test takes too long to run: 20 seconds
    [test_mailbox.py]=1
    [test_marshal.py]=1
--- a/uncompyle6/parsers/reducecheck/ifstmt.py
+++ b/uncompyle6/parsers/reducecheck/ifstmt.py
@@ -29,6 +29,9 @@ def ifstmt(self, lhs, n, rule, ast, tokens, first, last):
                if tokens[l] == "JUMP_FORWARD":
                    return tokens[l].attr != pjif_target
                return True
+            elif lhs == "ifstmtl" and tokens[first].off2int() > pjif_target:
+                # A conditional JUMP to the loop is expected for "ifstmtl"
+                return False
            pass
        pass
    pass
--- a/uncompyle6/scanners/scanner37base.py
+++ b/uncompyle6/scanners/scanner37base.py
@@ -30,7 +30,7 @@ Finally we save token information.
 """

 from xdis.code import iscode
-from xdis.bytecode import instruction_size, _get_const_info
+from xdis.bytecode import instruction_size, _get_const_info, Instruction

 from uncompyle6.scanner import Token
 import xdis
@@ -253,6 +253,39 @@ class Scanner37Base(Scanner):
                    pass
                pass

+        # Operand values in Python wordcode are small. As a result,
+        # there are these EXTENDED_ARG instructions - way more than
+        # before 3.6. These parsing a lot of pain.
+
+        # To simplify things we want to untangle this. We also
+        # do this loop before we compute jump targets.
+        for i, inst in enumerate(self.insts):
+
+            # One artifact of the "too-small" operand problem, is that
+            # some backward jumps, are turned into forward jumps to another
+            # "extended arg" backward jump to the same location.
+            if inst.opname == "JUMP_FORWARD":
+                jump_inst = self.insts[self.offset2inst_index[inst.argval]]
+                if jump_inst.has_extended_arg:
+                    # Create comination of the jump-to instruction and
+                    # this one. Keep the position information of this instruction,
+                    # but the operator and operand properties come from the other
+                    # instruction
+                    self.insts[i] = Instruction(
+                        jump_inst.opname,
+                        jump_inst.opcode,
+                        jump_inst.optype,
+                        jump_inst.inst_size,
+                        jump_inst.arg,
+                        jump_inst.argval,
+                        jump_inst.argrepr,
+                        jump_inst.has_arg,
+                        inst.offset,
+                        inst.starts_line,
+                        inst.is_jump_target,
+                        inst.has_extended_arg,
+                    )
+
        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)
@@ -302,6 +335,7 @@ class Scanner37Base(Scanner):
                            offset="%s_%s" % (inst.offset, jump_idx),
                            has_arg=True,
                            opc=self.opc,
+                            has_extended_arg=False,
                        ),
                    )
                    jump_idx += 1
@@ -318,6 +352,7 @@ class Scanner37Base(Scanner):
                        offset="%s" % (inst.offset),
                        has_arg=True,
                        opc=self.opc,
+                        has_extended_arg=inst.has_extended_arg,
                    ),
                )

@@ -382,6 +417,7 @@ class Scanner37Base(Scanner):
                        op=op,
                        has_arg=inst.has_arg,
                        opc=self.opc,
+                        has_extended_arg=inst.has_extended_arg,
                    ),
                )
                continue
@@ -475,6 +511,7 @@ class Scanner37Base(Scanner):
                    op=op,
                    has_arg=inst.has_arg,
                    opc=self.opc,
+                    has_extended_arg=inst.has_extended_arg,
                ),
            )
            pass
--- a/uncompyle6/scanners/tok.py
+++ b/uncompyle6/scanners/tok.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2016-2019 by Rocky Bernstein
+#  Copyright (c) 2016-2020 by Rocky Bernstein
 #  Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
 #  Copyright (c) 1999 John Aycock
 #
@@ -44,11 +44,17 @@ class Token:
        op=None,
        has_arg=None,
        opc=None,
+        has_extended_arg=False
    ):
        self.kind = intern(opname)
        self.has_arg = has_arg
        self.attr = attr
        self.pattr = pattr
+        if has_extended_arg:
+            self.offset = "%d_%d" % (offset, offset+2)
+        else:
+            self.offset = offset
+
        self.offset = offset
        self.linestart = linestart
        if has_arg is False:
@@ -99,7 +105,7 @@ class Token:
            if self.linestart
            else (" " * (6 + len(line_prefix)))
        )
-        offset_opname = "%6s  %-17s" % (self.offset, self.kind)
+        offset_opname = "%8s  %-17s" % (self.offset, self.kind)

        if not self.has_arg:
            return "%s%s" % (prefix, offset_opname)
@@ -131,7 +137,7 @@ class Token:
                    return "%s%s%s %s" % (prefix, offset_opname, argstr, pattr)
                elif self.op in self.opc.hasvargs:
                    return "%s%s%s" % (prefix, offset_opname, argstr)
-                elif name == 'LOAD_ASSERT':
+                elif name == "LOAD_ASSERT":
                    return "%s%s        %s" % (prefix, offset_opname, pattr)
                elif self.op in self.opc.NAME_OPS:
                    if self.opc.version >= 3.0:
@@ -164,6 +170,21 @@ class Token:
            return self.offset
        else:
            assert isinstance(self.offset, str)
+            offsets = list(map(int, self.offset.split("_")))
+            if len(offsets) == 1:
+                return offsets[0]
+            else:
+                assert len(offsets) == 2
+                offset_1, offset_2 = offsets
+            if offset_1 + 2 == offset_2:
+                # This is an instruction with an extended arg.
+                # For things that compare against offsets, we generally want the
+                # later offset.
+                return offset_2 if prefer_last else offset_1
+            else:
+                # Probably a "COME_FROM"-type offset, where the second number
+                # is just a count, and not really an offset.
+                return offset_1
            return(int(self.offset.split("_")[0]))


--- a/uncompyle6/semantics/transform.py
+++ b/uncompyle6/semantics/transform.py
@@ -20,7 +20,7 @@ from spark_parser import GenericASTTraversal, GenericASTTraversalPruningExceptio

 from uncompyle6.semantics.helper import find_code_node
 from uncompyle6.parsers.treenode import SyntaxTree
-from uncompyle6.scanners.tok import Token
+from uncompyle6.scanners.tok import NoneToken, Token
 from uncompyle6.semantics.consts import RETURN_NONE


@@ -104,8 +104,9 @@ class TreeTransform(GenericASTTraversal, object):

        testexpr = node[0]

-        if testexpr.kind != "testexpr":
+        if testexpr != "testexpr":
            return node
+
        if node.kind in ("ifstmt", "ifstmtl"):
            ifstmts_jump = node[1]

@@ -121,14 +122,21 @@ class TreeTransform(GenericASTTraversal, object):
        if stmts in ("c_stmts",) and len(stmts) == 1:
            stmt = stmts[0]
            raise_stmt = stmt[0]
+            testtrue_or_false = testexpr[0]
            if (
                raise_stmt == "raise_stmt1"
-                and len(testexpr[0]) == 2
+                and 1 <= len(testtrue_or_false) <= 2
                and raise_stmt.first_child().pattr == "AssertionError"
            ):
-                assert_expr = testexpr[0][0]
-                assert_expr.kind = "assert_expr"
-                jump_cond = testexpr[0][1]
+                if  testtrue_or_false == "testtrue":
+                    # Skip over the testtrue because because it would
+                    # produce a "not" and we don't want that here.
+                    assert_expr = testtrue_or_false[0]
+                    jump_cond = NoneToken
+                else:
+                    assert_expr = testtrue_or_false[0]
+                    jump_cond = testtrue_or_false[1]
+                    assert_expr.kind = "assert_expr"
                expr = raise_stmt[0]
                RAISE_VARARGS_1 = raise_stmt[1]
                call = expr[0]
@@ -146,10 +154,12 @@ class TreeTransform(GenericASTTraversal, object):
                    #                     1. RAISE_VARARGS_1
                    # becomes:
                    # assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_1 COME_FROM
-                    if jump_cond == "jmp_true":
+                    if jump_cond in ("jmp_true", NoneToken):
                        kind = "assert2"
                    else:
-                        assert jump_cond == "jmp_false"
+                        if jump_cond == "jmp_false":
+                            # FIXME: We don't handle this kind of thing yet.
+                            return node
                        kind = "assert2not"

                    LOAD_ASSERT = call[0].first_child()
@@ -183,7 +193,7 @@ class TreeTransform(GenericASTTraversal, object):
                    #             1.   RAISE_VARARGS_1
                    # becomes:
                    # assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM
-                    if jump_cond == "jmp_true":
+                    if jump_cond in ("jmp_true", NoneToken):
                        if self.is_pypy:
                            kind = "assert0_pypy"
                        else: