Merge branch 'master' into python-2.4

2025-08-02 16:44:46 +08:00 · 2019-12-09 22:05:20 -05:00
parent ddaa7ef337 3e3dd87c3b
commit efac5268a4
12 changed files with 103 additions and 174 deletions
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,23 @@
-3.5.1 2019-10-29 JNC
+3.6.0: 2019-12-10 gecko gecko
+=============================
+
+The main focus in this release was more accurate decompilation especially
+for 3.7 and 3.8. However there are some improvments to Python 2.x as well,
+including one of the long-standing problems of detecting the difference between
+`try ... ` and `try else ...`.
+
+With this release we now rebase Python 3.7 on off of a 3.7 base; This
+is also as it is (now) in decompyle3.  This facilitates removing some of the
+cruft in control-flow detection in the 2.7 uncompyle2 base.
+
+Alas, decompilation speed for 3.7 on is greatly increased. Hopefull
+this is temporary (cough, cough) until we can do a static control flow
+pass.
+
+Finally, runing in 3.9-dev is tolerated. We can disassemble, but no parse tables yet.
+
+
+3.5.1 2019-11-17 JNC
 ====================

 - Pypy 3.3, 3.5, 3.6, and 3.6.9 support
--- a/README.rst
+++ b/README.rst
@@ -222,7 +222,6 @@ There is lots to do, so please dig in and help.
 See Also
 --------

-* https://github.com/zrax/pycdc : aims to support all versions of Python, but doesn't currently. It is written in C++ and is most accurate for Python versions around 2.7 and 3.3 when the code was more actively developed. Accuracy for more recent versions of Python 3 and early versions of Python are especially lacking. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
 * https://github.com/rocky/python-decompile3 : Much smaller and more modern code, focusing on 3.7+. Changes in that will get migrated back ehre.
 * https://code.google.com/archive/p/unpyc3/ : supports Python 3.2 only. The above projects use a different decompiling technique than what is used here. Currently unmaintained.
 * https://github.com/figment/unpyc3/ : fork of above, but supports Python 3.3 only. Includes some fixes like supporting function annotations. Currently unmaintained.
@@ -232,6 +231,7 @@ See Also
 * https://github.com/rocky/python-xdis : Cross Python version disassembler
 * https://github.com/rocky/python-xasm : Cross Python version assembler
 * https://github.com/rocky/python-uncompyle6/wiki : Wiki Documents which describe the code and aspects of it in more detail
+* https://github.com/zrax/pycdc : The README for this C++ code syas it aims to support all versions of Python. It is best for Python versions around 2.7 and 3.3 when the code was initially developed. Accuracy for current versions of Python3 and early versions of Python is lacking. Without major effort, it is unlikely it can be made to support current Python 3. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.


 .. _trepan: https://pypi.python.org/pypi/trepan2g
--- a/test/bytecode_2.7_run/15_mixed_expressions.pyc
+++ b/test/bytecode_2.7_run/15_mixed_expressions.pyc
--- a/test/bytecode_3.0_run/15_mixed_expressions.pyc
+++ b/test/bytecode_3.0_run/15_mixed_expressions.pyc
--- a/test/bytecode_3.1_run/15_mixed_expressions.pyc
+++ b/test/bytecode_3.1_run/15_mixed_expressions.pyc
--- a/test/stdlib/runtests.sh
+++ b/test/stdlib/runtests.sh
@@ -37,7 +37,13 @@ case $PYVERSION in
 	    [test_pep247.py]=1 # Long test - might work? Control flow?
 	    [test_pwd.py]=1 # Long test - might work? Control flow?
 	    [test_pyclbr.py]=1 # Investigate
+<<<<<<< HEAD
 	    [test_re.py]=1 # Investigate produces a Python syntax error
+=======
+	    [test_pyexpat.py]=1 # Investigate
+	    [test_queue.py]=1 # Control flow?
+	    [test_re.py]=1 # try confused with try-else again
+>>>>>>> master
 	    [test_socketserver.py]=1 # -- test takes too long to run: 40 seconds
 	    [test_threading.py]=1 # Line numbers are expected to be different
 	    [test_thread.py]=1 # test takes too long to run: 36 seconds
@@ -55,7 +61,15 @@ case $PYVERSION in
 	    [test_pep352.py]=1     # Investigate
 	    [test_pwd.py]=1 # Long test - might work? Control flow?
 	    [test_pyclbr.py]=1 # Investigate
+<<<<<<< HEAD
 	    [test_struct.py]=1 # "if and" confused for if .. assert and
+=======
+	    [test_queue.py]=1 # Control flow?
+	    [test_re.py]=1 # Possibly try confused with try-else again
+	    [test_struct.py]=1 # "if and" confused for if .. assert and
+	    [test_sys.py]=1 # try confused with try-else again; in test_current_frames()
+	    [test_tarfile.py]=1  # try confused with try-else again; top-level import
+>>>>>>> master
 	    [test_threading.py]=1 # Line numbers are expected to be different
 	    [test_thread.py]=1 # test takes too long to run: 36 seconds
 	    [test_trace.py]=1  # Line numbers are expected to be different
@@ -118,7 +132,6 @@ case $PYVERSION in
 	    [test_doctest.py]=1 # Fails on its own
 	    [test_exceptions.py]=1
 	    [test_format.py]=1  # control flow. uncompyle2 does not have problems here
-	    [test_frozen.py]=1  # try vs try/else control flow. uncompyle2 does not have problems here
 	    [test_generators.py]=1  # control flow. uncompyle2 has problem here too
 	    [test_grammar.py]=1     # Too many stmts. Handle large stmts
 	    [test_grp.py]=1     # test takes to long, works interactively though
@@ -131,8 +144,6 @@ case $PYVERSION in
 	    [test_memoryio.py]=1 # FIX
 	    [test_modulefinder.py]=1 # FIX
 	    [test_multiprocessing.py]=1 # On uncompyle2, takes 24 secs
-	    [test_posix.py]=1   # Bug in try-else detection inside test_initgroups()
-	                        # Deal with when we have better flow-control detection
 	    [test_pwd.py]=1     # Takes too long
 	    [test_pty.py]=1
 	    [test_runpy.py]=1   # Long and fails on its own
--- a/uncompyle6/scanner.py
+++ b/uncompyle6/scanner.py
@@ -62,6 +62,7 @@ PYTHON_VERSIONS = frozenset(
        3.6,
        3.7,
        3.8,
+        3.9,
    )
 )

--- a/uncompyle6/scanners/scanner37base.py
+++ b/uncompyle6/scanners/scanner37base.py
@@ -775,184 +775,27 @@ class Scanner37Base(Scanner):
                    }
                )
        elif op in self.pop_jump_tf:
-            start = offset + inst.inst_size
            target = inst.argval
-            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

-            # Do not let jump to go out of parent struct bounds
-            if target != rtarget and parent["type"] == "and/or":
-                self.fixed_jumps[offset] = rtarget
-                return
-
-            # Does this jump to right after another conditional jump that is
-            # not myself?  If so, it's part of a larger conditional.
-            # rocky: if we have a conditional jump to the next instruction, then
-            # possibly I am "skipping over" a "pass" or null statement.
+            # FIXME: hack upon hack, test_pysource.py fails with this
+            # Until the grammar is corrected we do this fiction...
            pretarget = self.get_inst(prev_op[target])
-
            if (
                pretarget.opcode in self.pop_jump_if_pop
                and (target > offset)
                and pretarget.offset != offset
            ):

-                # FIXME: hack upon hack...
-                # In some cases the pretarget can be a jump to the next instruction
-                # and these aren't and/or's either. We limit to 3.5+ since we experienced there
-                # but it might be earlier versions, or might be a general principle.
                if pretarget.argval != target:
                    # FIXME: this is not accurate The commented out below
                    # is what it should be. However grammar rules right now
                    # assume the incorrect offsets.
                    # self.fixed_jumps[offset] = target
                    self.fixed_jumps[offset] = pretarget.offset
-                    self.structs.append(
-                        {"type": "and/or", "start": start, "end": pretarget.offset}
-                    )
                    return

-            # The opcode *two* instructions before the target jump offset is important
-            # in making a determination of what we have. Save that.
-            pre_rtarget = prev_op[rtarget]
-
-            if op == self.opc.POP_JUMP_IF_FALSE:
-                self.fixed_jumps[offset] = target
-
-            # op == POP_JUMP_IF_TRUE
-            else:
-                next = self.next_stmt[offset]
-                if prev_op[next] == offset:
-                    pass
-                elif self.is_jump_forward(next) and target == self.get_target(next):
-                    if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE:
-                        if (
-                            code[next] == self.opc.JUMP_FORWARD
-                            or target != rtarget
-                            or code[prev_op[pre_rtarget]]
-                            not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
-                        ):
-                            self.fixed_jumps[offset] = prev_op[next]
-                            return
-                elif (
-                    code[next] == self.opc.JUMP_ABSOLUTE
-                    and self.is_jump_forward(target)
-                    and self.get_target(target) == self.get_target(next)
-                ):
-                    self.fixed_jumps[offset] = prev_op[next]
-                    return
-
-            rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
-            if (
-                rtarget_is_ja
-                and pre_rtarget in self.stmts
-                and pre_rtarget != offset
-                and prev_op[pre_rtarget] != offset
-                and not (
-                    code[rtarget] == self.opc.JUMP_ABSOLUTE
-                    and code[rtarget + 3] == self.opc.POP_BLOCK
-                    and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE
-                )
-            ):
-                rtarget = pre_rtarget
-
-            # Does the "jump if" jump beyond a jump op?
-            # That is, we have something like:
-            #  POP_JUMP_IF_FALSE HERE
-            #  ...
-            # JUMP_FORWARD
-            # HERE:
-            #
-            # If so, this can be block inside an "if" statement
-            # or a conditional assignment like:
-            #   x = 1 if x else 2
-            #
-            # For 3.5, for JUMP_FORWARD above we could have also
-            # JUMP_BACK or CONTINUE
-            #
-            # There are other situations we may need to consider, like
-            # if the condition jump is to a forward location.
-            # Also the existence of a jump to the instruction after "END_FINALLY"
-            # will distinguish "try/else" from "try".
-            rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP)
-
-            if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja):
-                if_end = self.get_target(pre_rtarget)
-
-                # If the jump target is back, we are looping
-                if (
-                    if_end < pre_rtarget
-                    and self.version < 3.8
-                    and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)
-                ):
-                    if if_end > start:
-                        return
-
-                end = self.restrict_to_parent(if_end, parent)
-
-                self.structs.append(
-                    {"type": "if-then", "start": start, "end": pre_rtarget}
-                )
-
-                # FIXME: add this
-                # self.fixed_jumps[offset] = rtarget
-                self.not_continue.add(pre_rtarget)
-
-                if rtarget < end and (
-                    code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE)
-                    and code[prev_op[pre_rtarget]]
-                    not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)
-                ):
-                    self.structs.append({"type": "else", "start": rtarget, "end": end})
-                    self.else_start[rtarget] = end
-            elif self.is_jump_back(pre_rtarget, 0):
-                if_end = rtarget
-                self.structs.append(
-                    {"type": "if-then", "start": start, "end": pre_rtarget}
-                )
-                self.not_continue.add(pre_rtarget)
-            elif code[pre_rtarget] in rtarget_break:
-                self.structs.append({"type": "if-then", "start": start, "end": rtarget})
-                # It is important to distingish if this return is inside some sort
-                # except block return
-                jump_prev = prev_op[offset]
-                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
-                    if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match":
-                        return
-                    pass
-
-                # Check that next instruction after pops and jump is
-                # not from SETUP_EXCEPT
-                next_op = rtarget
-                if code[next_op] == self.opc.POP_BLOCK:
-                    next_op += instruction_size(self.code[next_op], self.opc)
-                if code[next_op] == self.opc.JUMP_ABSOLUTE:
-                    next_op += instruction_size(self.code[next_op], self.opc)
-                if next_op in targets:
-                    for try_op in targets[next_op]:
-                        come_from_op = code[try_op]
-                        if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT:
-                            return
-                        pass
-
-                self.fixed_jumps[offset] = rtarget
-
-                if code[pre_rtarget] == self.opc.RETURN_VALUE:
-                    # If we are at some sort of POP_JUMP_IF and the instruction before was
-                    # COMPARE_OP exception-match, then pre_rtarget is not an end_if
-                    if not (
-                        inst_index > 0
-                        and self.insts[inst_index - 1].argval == "exception-match"
-                    ):
-                        self.return_end_ifs.add(pre_rtarget)
-                else:
-                    self.fixed_jumps[offset] = rtarget
-                    self.not_continue.add(pre_rtarget)
-            else:
-
-                if target > offset:
-                    self.fixed_jumps[offset] = target
-                    pass
+            self.fixed_jumps[offset] = target

        elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
--- a/uncompyle6/scanners/scanner38.py
+++ b/uncompyle6/scanners/scanner38.py
@@ -12,14 +12,13 @@
 #
 #  You should have received a copy of the GNU General Public License
 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-Python 3.8 bytecode decompiler scanner
+"""Python 3.8 bytecode decompiler scanner

-Does some additional massaging of xdis-disassembled instructions to
-make things easier for decompilation.
+Does some token massaging of xdis-disassembled instructions to make
+things easier for decompilation.

 This sets up opcodes Python's 3.8 and calls a generalized
-scanner routine for Python 3.
+scanner routine for Python 3.7 and up.
 """

 from uncompyle6.scanners.scanner37 import Scanner37
--- a/uncompyle6/scanners/scanner39.py
+++ b/uncompyle6/scanners/scanner39.py
@@ -0,0 +1,55 @@
+#  Copyright (c) 2019 by Rocky Bernstein
+#
+#  This program is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""Python 3.9 bytecode decompiler scanner.
+
+Does some token massaging of xdis-disassembled instructions to make
+things easier for decompilation.
+
+This sets up opcodes Python's 3.9 and calls a generalized
+scanner routine for Python 3.7 and up.
+"""
+
+from uncompyle6.scanners.scanner38 import Scanner38
+from uncompyle6.scanners.scanner37base import Scanner37Base
+
+# bytecode verification, verify(), uses JUMP_OPs from here
+from xdis.opcodes import opcode_38 as opc
+
+# bytecode verification, verify(), uses JUMP_OPS from here
+JUMP_OPs = opc.JUMP_OPS
+
+
+class Scanner39(Scanner38):
+    def __init__(self, show_asm=None):
+        Scanner37Base.__init__(self, 3.9, show_asm)
+        return
+
+    pass
+
+
+if __name__ == "__main__":
+    from uncompyle6 import PYTHON_VERSION
+
+    if PYTHON_VERSION == 3.9:
+        import inspect
+
+        co = inspect.currentframe().f_code
+        tokens, customize = Scanner39().ingest(co)
+        for t in tokens:
+            print(t.format())
+        pass
+    else:
+        print("Need to be Python 3.9 to demo; I am %s." %
+              PYTHON_VERSION)
--- a/uncompyle6/semantics/customize38.py
+++ b/uncompyle6/semantics/customize38.py
@@ -89,9 +89,10 @@ def customize_for_version38(self, version):
                            (1, "_ifstmts_jumpl") ),

        'whilestmt38': ( '%|while %c:\n%+%c%-\n\n',
-                         (1, 'testexpr'), (2, 'l_stmts') ),
+                         (1, 'testexpr'),
+                         2 ), # "l_stmts" or "pass"
        'whileTruestmt38': ( '%|while True:\n%+%c%-\n\n',
-                         (1, 'l_stmts') ),
+                             1 ), # "l_stmts" or "pass"
        'try_elsestmtl38': (
            '%|try:\n%+%c%-%c%|else:\n%+%c%-',
            (1, 'suite_stmts_opt'),
@@ -106,7 +107,7 @@ def customize_for_version38(self, version):
        'tryfinally38': (
            '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n',
                   (3, 'returns'), 6 ),
-        "named_expr": ( # AKA "walrus operatotr"
+        "named_expr": ( # AKA "walrus operator"
            "%c := %c", (2, "store"), (0, "expr")
            )
    })
--- a/uncompyle6/version.py
+++ b/uncompyle6/version.py
@@ -12,4 +12,4 @@
 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # This file is suitable for sourcing inside bash as
 # well as importing into Python
-VERSION="3.5.1"  # noqa
+VERSION="3.6.0"  # noqa