Merge branch 'master' into python-2.4

This commit is contained in:
rocky
2019-12-09 22:05:20 -05:00
12 changed files with 103 additions and 174 deletions

21
NEWS.md
View File

@@ -1,4 +1,23 @@
3.5.1 2019-10-29 JNC
3.6.0: 2019-12-10 gecko gecko
=============================
The main focus in this release was more accurate decompilation especially
for 3.7 and 3.8. However there are some improvments to Python 2.x as well,
including one of the long-standing problems of detecting the difference between
`try ... ` and `try else ...`.
With this release we now rebase Python 3.7 on off of a 3.7 base; This
is also as it is (now) in decompyle3. This facilitates removing some of the
cruft in control-flow detection in the 2.7 uncompyle2 base.
Alas, decompilation speed for 3.7 on is greatly increased. Hopefull
this is temporary (cough, cough) until we can do a static control flow
pass.
Finally, runing in 3.9-dev is tolerated. We can disassemble, but no parse tables yet.
3.5.1 2019-11-17 JNC
====================
- Pypy 3.3, 3.5, 3.6, and 3.6.9 support

View File

@@ -222,7 +222,6 @@ There is lots to do, so please dig in and help.
See Also
--------
* https://github.com/zrax/pycdc : aims to support all versions of Python, but doesn't currently. It is written in C++ and is most accurate for Python versions around 2.7 and 3.3 when the code was more actively developed. Accuracy for more recent versions of Python 3 and early versions of Python are especially lacking. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
* https://github.com/rocky/python-decompile3 : Much smaller and more modern code, focusing on 3.7+. Changes in that will get migrated back ehre.
* https://code.google.com/archive/p/unpyc3/ : supports Python 3.2 only. The above projects use a different decompiling technique than what is used here. Currently unmaintained.
* https://github.com/figment/unpyc3/ : fork of above, but supports Python 3.3 only. Includes some fixes like supporting function annotations. Currently unmaintained.
@@ -232,6 +231,7 @@ See Also
* https://github.com/rocky/python-xdis : Cross Python version disassembler
* https://github.com/rocky/python-xasm : Cross Python version assembler
* https://github.com/rocky/python-uncompyle6/wiki : Wiki Documents which describe the code and aspects of it in more detail
* https://github.com/zrax/pycdc : The README for this C++ code syas it aims to support all versions of Python. It is best for Python versions around 2.7 and 3.3 when the code was initially developed. Accuracy for current versions of Python3 and early versions of Python is lacking. Without major effort, it is unlikely it can be made to support current Python 3. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
.. _trepan: https://pypi.python.org/pypi/trepan2g

View File

@@ -37,7 +37,13 @@ case $PYVERSION in
[test_pep247.py]=1 # Long test - might work? Control flow?
[test_pwd.py]=1 # Long test - might work? Control flow?
[test_pyclbr.py]=1 # Investigate
<<<<<<< HEAD
[test_re.py]=1 # Investigate produces a Python syntax error
=======
[test_pyexpat.py]=1 # Investigate
[test_queue.py]=1 # Control flow?
[test_re.py]=1 # try confused with try-else again
>>>>>>> master
[test_socketserver.py]=1 # -- test takes too long to run: 40 seconds
[test_threading.py]=1 # Line numbers are expected to be different
[test_thread.py]=1 # test takes too long to run: 36 seconds
@@ -55,7 +61,15 @@ case $PYVERSION in
[test_pep352.py]=1 # Investigate
[test_pwd.py]=1 # Long test - might work? Control flow?
[test_pyclbr.py]=1 # Investigate
<<<<<<< HEAD
[test_struct.py]=1 # "if and" confused for if .. assert and
=======
[test_queue.py]=1 # Control flow?
[test_re.py]=1 # Possibly try confused with try-else again
[test_struct.py]=1 # "if and" confused for if .. assert and
[test_sys.py]=1 # try confused with try-else again; in test_current_frames()
[test_tarfile.py]=1 # try confused with try-else again; top-level import
>>>>>>> master
[test_threading.py]=1 # Line numbers are expected to be different
[test_thread.py]=1 # test takes too long to run: 36 seconds
[test_trace.py]=1 # Line numbers are expected to be different
@@ -118,7 +132,6 @@ case $PYVERSION in
[test_doctest.py]=1 # Fails on its own
[test_exceptions.py]=1
[test_format.py]=1 # control flow. uncompyle2 does not have problems here
[test_frozen.py]=1 # try vs try/else control flow. uncompyle2 does not have problems here
[test_generators.py]=1 # control flow. uncompyle2 has problem here too
[test_grammar.py]=1 # Too many stmts. Handle large stmts
[test_grp.py]=1 # test takes to long, works interactively though
@@ -131,8 +144,6 @@ case $PYVERSION in
[test_memoryio.py]=1 # FIX
[test_modulefinder.py]=1 # FIX
[test_multiprocessing.py]=1 # On uncompyle2, takes 24 secs
[test_posix.py]=1 # Bug in try-else detection inside test_initgroups()
# Deal with when we have better flow-control detection
[test_pwd.py]=1 # Takes too long
[test_pty.py]=1
[test_runpy.py]=1 # Long and fails on its own

View File

@@ -62,6 +62,7 @@ PYTHON_VERSIONS = frozenset(
3.6,
3.7,
3.8,
3.9,
)
)

View File

@@ -775,184 +775,27 @@ class Scanner37Base(Scanner):
}
)
elif op in self.pop_jump_tf:
start = offset + inst.inst_size
target = inst.argval
rtarget = self.restrict_to_parent(target, parent)
prev_op = self.prev_op
# Do not let jump to go out of parent struct bounds
if target != rtarget and parent["type"] == "and/or":
self.fixed_jumps[offset] = rtarget
return
# Does this jump to right after another conditional jump that is
# not myself? If so, it's part of a larger conditional.
# rocky: if we have a conditional jump to the next instruction, then
# possibly I am "skipping over" a "pass" or null statement.
# FIXME: hack upon hack, test_pysource.py fails with this
# Until the grammar is corrected we do this fiction...
pretarget = self.get_inst(prev_op[target])
if (
pretarget.opcode in self.pop_jump_if_pop
and (target > offset)
and pretarget.offset != offset
):
# FIXME: hack upon hack...
# In some cases the pretarget can be a jump to the next instruction
# and these aren't and/or's either. We limit to 3.5+ since we experienced there
# but it might be earlier versions, or might be a general principle.
if pretarget.argval != target:
# FIXME: this is not accurate The commented out below
# is what it should be. However grammar rules right now
# assume the incorrect offsets.
# self.fixed_jumps[offset] = target
self.fixed_jumps[offset] = pretarget.offset
self.structs.append(
{"type": "and/or", "start": start, "end": pretarget.offset}
)
return
# The opcode *two* instructions before the target jump offset is important
# in making a determination of what we have. Save that.
pre_rtarget = prev_op[rtarget]
if op == self.opc.POP_JUMP_IF_FALSE:
self.fixed_jumps[offset] = target
# op == POP_JUMP_IF_TRUE
else:
next = self.next_stmt[offset]
if prev_op[next] == offset:
pass
elif self.is_jump_forward(next) and target == self.get_target(next):
if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE:
if (
code[next] == self.opc.JUMP_FORWARD
or target != rtarget
or code[prev_op[pre_rtarget]]
not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
):
self.fixed_jumps[offset] = prev_op[next]
return
elif (
code[next] == self.opc.JUMP_ABSOLUTE
and self.is_jump_forward(target)
and self.get_target(target) == self.get_target(next)
):
self.fixed_jumps[offset] = prev_op[next]
return
rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
if (
rtarget_is_ja
and pre_rtarget in self.stmts
and pre_rtarget != offset
and prev_op[pre_rtarget] != offset
and not (
code[rtarget] == self.opc.JUMP_ABSOLUTE
and code[rtarget + 3] == self.opc.POP_BLOCK
and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE
)
):
rtarget = pre_rtarget
# Does the "jump if" jump beyond a jump op?
# That is, we have something like:
# POP_JUMP_IF_FALSE HERE
# ...
# JUMP_FORWARD
# HERE:
#
# If so, this can be block inside an "if" statement
# or a conditional assignment like:
# x = 1 if x else 2
#
# For 3.5, for JUMP_FORWARD above we could have also
# JUMP_BACK or CONTINUE
#
# There are other situations we may need to consider, like
# if the condition jump is to a forward location.
# Also the existence of a jump to the instruction after "END_FINALLY"
# will distinguish "try/else" from "try".
rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP)
if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja):
if_end = self.get_target(pre_rtarget)
# If the jump target is back, we are looping
if (
if_end < pre_rtarget
and self.version < 3.8
and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)
):
if if_end > start:
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append(
{"type": "if-then", "start": start, "end": pre_rtarget}
)
# FIXME: add this
# self.fixed_jumps[offset] = rtarget
self.not_continue.add(pre_rtarget)
if rtarget < end and (
code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE)
and code[prev_op[pre_rtarget]]
not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)
):
self.structs.append({"type": "else", "start": rtarget, "end": end})
self.else_start[rtarget] = end
elif self.is_jump_back(pre_rtarget, 0):
if_end = rtarget
self.structs.append(
{"type": "if-then", "start": start, "end": pre_rtarget}
)
self.not_continue.add(pre_rtarget)
elif code[pre_rtarget] in rtarget_break:
self.structs.append({"type": "if-then", "start": start, "end": rtarget})
# It is important to distingish if this return is inside some sort
# except block return
jump_prev = prev_op[offset]
if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match":
return
pass
# Check that next instruction after pops and jump is
# not from SETUP_EXCEPT
next_op = rtarget
if code[next_op] == self.opc.POP_BLOCK:
next_op += instruction_size(self.code[next_op], self.opc)
if code[next_op] == self.opc.JUMP_ABSOLUTE:
next_op += instruction_size(self.code[next_op], self.opc)
if next_op in targets:
for try_op in targets[next_op]:
come_from_op = code[try_op]
if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT:
return
pass
self.fixed_jumps[offset] = rtarget
if code[pre_rtarget] == self.opc.RETURN_VALUE:
# If we are at some sort of POP_JUMP_IF and the instruction before was
# COMPARE_OP exception-match, then pre_rtarget is not an end_if
if not (
inst_index > 0
and self.insts[inst_index - 1].argval == "exception-match"
):
self.return_end_ifs.add(pre_rtarget)
else:
self.fixed_jumps[offset] = rtarget
self.not_continue.add(pre_rtarget)
else:
if target > offset:
self.fixed_jumps[offset] = target
pass
self.fixed_jumps[offset] = target
elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT:
target = self.get_target(offset)

View File

@@ -12,14 +12,13 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Python 3.8 bytecode decompiler scanner
"""Python 3.8 bytecode decompiler scanner
Does some additional massaging of xdis-disassembled instructions to
make things easier for decompilation.
Does some token massaging of xdis-disassembled instructions to make
things easier for decompilation.
This sets up opcodes Python's 3.8 and calls a generalized
scanner routine for Python 3.
scanner routine for Python 3.7 and up.
"""
from uncompyle6.scanners.scanner37 import Scanner37

View File

@@ -0,0 +1,55 @@
# Copyright (c) 2019 by Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Python 3.9 bytecode decompiler scanner.
Does some token massaging of xdis-disassembled instructions to make
things easier for decompilation.
This sets up opcodes Python's 3.9 and calls a generalized
scanner routine for Python 3.7 and up.
"""
from uncompyle6.scanners.scanner38 import Scanner38
from uncompyle6.scanners.scanner37base import Scanner37Base
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_38 as opc
# bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS
class Scanner39(Scanner38):
def __init__(self, show_asm=None):
Scanner37Base.__init__(self, 3.9, show_asm)
return
pass
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
if PYTHON_VERSION == 3.9:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner39().ingest(co)
for t in tokens:
print(t.format())
pass
else:
print("Need to be Python 3.9 to demo; I am %s." %
PYTHON_VERSION)

View File

@@ -89,9 +89,10 @@ def customize_for_version38(self, version):
(1, "_ifstmts_jumpl") ),
'whilestmt38': ( '%|while %c:\n%+%c%-\n\n',
(1, 'testexpr'), (2, 'l_stmts') ),
(1, 'testexpr'),
2 ), # "l_stmts" or "pass"
'whileTruestmt38': ( '%|while True:\n%+%c%-\n\n',
(1, 'l_stmts') ),
1 ), # "l_stmts" or "pass"
'try_elsestmtl38': (
'%|try:\n%+%c%-%c%|else:\n%+%c%-',
(1, 'suite_stmts_opt'),
@@ -106,7 +107,7 @@ def customize_for_version38(self, version):
'tryfinally38': (
'%|try:\n%+%c%-%|finally:\n%+%c%-\n\n',
(3, 'returns'), 6 ),
"named_expr": ( # AKA "walrus operatotr"
"named_expr": ( # AKA "walrus operator"
"%c := %c", (2, "store"), (0, "expr")
)
})

View File

@@ -12,4 +12,4 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This file is suitable for sourcing inside bash as
# well as importing into Python
VERSION="3.5.1" # noqa
VERSION="3.6.0" # noqa