You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Merge branch 'master' into python-2.4
This commit is contained in:
35
README.rst
35
README.rst
@@ -93,8 +93,8 @@ This uses setup.py, so it follows the standard Python routine:
|
||||
A GNU makefile is also provided so :code:`make install` (possibly as root or
|
||||
sudo) will do the steps above.
|
||||
|
||||
Testing
|
||||
-------
|
||||
Running Tests
|
||||
-------------
|
||||
|
||||
::
|
||||
|
||||
@@ -133,18 +133,8 @@ You can also cross compare the results with pycdc_ . Since they work
|
||||
differently, bugs here often aren't in that, and vice versa.
|
||||
|
||||
|
||||
Known Bugs/Restrictions
|
||||
-----------------------
|
||||
|
||||
The biggest known and possibly fixable (but hard) problem has to do
|
||||
with handling control flow. (Python has probably the most diverse and
|
||||
screwy set of compound statements I've ever seen; there
|
||||
are "else" clauses on loops and try blocks that I suspect many
|
||||
programmers don't know about.)
|
||||
|
||||
All of the Python decompilers that I have looked at have problems
|
||||
decompiling Python's control flow. In some cases we can detect an
|
||||
erroneous decompilation and report that.
|
||||
Verification
|
||||
------------
|
||||
|
||||
In older versions of Python it was possible to verify bytecode by
|
||||
decompiling bytecode, and then compiling using the Python interpreter
|
||||
@@ -167,6 +157,19 @@ And already Python has a set of programs like this: the test suite
|
||||
for the standard library that comes with Python. We have some
|
||||
code in `test/stdlib` to facilitate this kind of checking.
|
||||
|
||||
Known Bugs/Restrictions
|
||||
-----------------------
|
||||
|
||||
The biggest known and possibly fixable (but hard) problem has to do
|
||||
with handling control flow. (Python has probably the most diverse and
|
||||
screwy set of compound statements I've ever seen; there
|
||||
are "else" clauses on loops and try blocks that I suspect many
|
||||
programmers don't know about.)
|
||||
|
||||
All of the Python decompilers that I have looked at have problems
|
||||
decompiling Python's control flow. In some cases we can detect an
|
||||
erroneous decompilation and report that.
|
||||
|
||||
Python support is strongest in Python 2 for 2.7 and drops off as you
|
||||
get further away from that. Support is also probably pretty good for
|
||||
python 2.3-2.4 since a lot of the goodness of early the version of the
|
||||
@@ -194,7 +197,7 @@ Between Python 3.5, 3.6 and 3.7 there have been major changes to the
|
||||
|
||||
Currently not all Python magic numbers are supported. Specifically in
|
||||
some versions of Python, notably Python 3.6, the magic number has
|
||||
changes several times within a version.
|
||||
changes several times within a version.
|
||||
|
||||
**We support only released versions, not candidate versions.** Note however
|
||||
that the magic of a released version is usually the same as the *last* candidate version prior to release.
|
||||
@@ -222,7 +225,7 @@ See Also
|
||||
* https://github.com/zrax/pycdc : purports to support all versions of Python. It is written in C++ and is most accurate for Python versions around 2.7 and 3.3 when the code was more actively developed. Accuracy for more recent versions of Python 3 and early versions of Python are especially lacking. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
|
||||
* https://code.google.com/archive/p/unpyc3/ : supports Python 3.2 only. The above projects use a different decompiling technique than what is used here. Currently unmaintained.
|
||||
* https://github.com/figment/unpyc3/ : fork of above, but supports Python 3.3 only. Includes some fixes like supporting function annotations. Currently unmaintained.
|
||||
* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situtations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
|
||||
* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
|
||||
* `How to report a bug <https://github.com/rocky/python-uncompyle6/blob/master/HOW-TO-REPORT-A-BUG.md>`_
|
||||
* The HISTORY_ file.
|
||||
* https://github.com/rocky/python-xdis : Cross Python version disassembler
|
||||
|
@@ -1,78 +0,0 @@
|
||||
import sys
|
||||
from uncompyle6 import PYTHON3
|
||||
if PYTHON3:
|
||||
from io import StringIO
|
||||
minint = -sys.maxsize-1
|
||||
maxint = sys.maxsize
|
||||
else:
|
||||
from StringIO import StringIO
|
||||
minint = -sys.maxint-1
|
||||
maxint = sys.maxint
|
||||
from uncompyle6.semantics.helper import print_docstring
|
||||
|
||||
class PrintFake:
|
||||
def __init__(self):
|
||||
self.pending_newlines = 0
|
||||
self.f = StringIO()
|
||||
|
||||
def write(self, *data):
|
||||
if (len(data) == 0) or (len(data) == 1 and data[0] == ''):
|
||||
return
|
||||
out = ''.join((str(j) for j in data))
|
||||
n = 0
|
||||
for i in out:
|
||||
if i == '\n':
|
||||
n += 1
|
||||
if n == len(out):
|
||||
self.pending_newlines = max(self.pending_newlines, n)
|
||||
return
|
||||
elif n:
|
||||
self.pending_newlines = max(self.pending_newlines, n)
|
||||
out = out[n:]
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if self.pending_newlines > 0:
|
||||
self.f.write('\n'*self.pending_newlines)
|
||||
self.pending_newlines = 0
|
||||
|
||||
for i in out[::-1]:
|
||||
if i == '\n':
|
||||
self.pending_newlines += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if self.pending_newlines:
|
||||
out = out[:-self.pending_newlines]
|
||||
self.f.write(out)
|
||||
def println(self, *data):
|
||||
if data and not(len(data) == 1 and data[0] == ''):
|
||||
self.write(*data)
|
||||
self.pending_newlines = max(self.pending_newlines, 1)
|
||||
return
|
||||
pass
|
||||
|
||||
def test_docstring():
|
||||
|
||||
for doc, expect in (
|
||||
("Now is the time",
|
||||
' """Now is the time"""'),
|
||||
("""
|
||||
Now is the time
|
||||
""",
|
||||
''' """
|
||||
Now is the time
|
||||
"""''')
|
||||
|
||||
# (r'''func placeholder - ' and with ("""\nstring\n """)''',
|
||||
# """ r'''func placeholder - ' and with (\"\"\"\nstring\n\"\"\")'''"""),
|
||||
# (r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """,
|
||||
# """ r\"\"\"func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" \"\"\"""")
|
||||
):
|
||||
|
||||
o = PrintFake()
|
||||
# print(doc)
|
||||
# print(expect)
|
||||
print_docstring(o, ' ', doc)
|
||||
assert expect == o.f.getvalue()
|
@@ -177,7 +177,7 @@ grammar-coverage-2.6:
|
||||
grammar-coverage-2.7:
|
||||
-rm $(COVER_DIR)/spark-grammar-2.7.cover || true
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pythonlib.py --bytecode-2.7
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.14 --max=600
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.16 --max=600
|
||||
|
||||
#: Get grammar coverage for Python 3.0
|
||||
grammar-coverage-3.0:
|
||||
@@ -220,7 +220,12 @@ grammar-coverage-3.5:
|
||||
grammar-coverage-3.6:
|
||||
rm $(COVER_DIR)/spark-grammar-3.6.cover || /bin/true
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pythonlib.py --bytecode-3.6
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.4 --max=280
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.8 --max=280
|
||||
|
||||
#: Get grammar coverage for Python 3.7
|
||||
grammar-coverage-3.7:
|
||||
rm $(COVER_DIR)/spark-grammar-3.7.cover || /bin/true
|
||||
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.7.cover $(PYTHON) test_pyenvlib.py --3.7.3 --max=500
|
||||
|
||||
#: Check deparsing Python 2.6
|
||||
check-bytecode-2.6:
|
||||
|
Binary file not shown.
BIN
test/bytecode_2.7_run/00_docstring.pyc
Normal file
BIN
test/bytecode_2.7_run/00_docstring.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
test/bytecode_3.7_run/00_docstring.pyc
Normal file
BIN
test/bytecode_3.7_run/00_docstring.pyc
Normal file
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
# Remake Python grammar statistics
|
||||
|
||||
typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.14 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.5 [3.6]=3.6.4)
|
||||
typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.16 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.6 [3.6]=3.6.8, [3.7]=3.7.3)
|
||||
|
||||
if (( $# == 0 )); then
|
||||
echo 1>&2 "usage: $0 two-digit-version"
|
||||
|
@@ -1,4 +1,6 @@
|
||||
# uncompyle2 bug was not escaping """ properly
|
||||
|
||||
# RUNNABLE!
|
||||
r'''func placeholder - with ("""\nstring\n""")'''
|
||||
def foo():
|
||||
r'''func placeholder - ' and with ("""\nstring\n""")'''
|
||||
@@ -20,3 +22,22 @@ def baz():
|
||||
>>> t.rundict(m1.__dict__, 'rundict_test_pvt') # None are skipped.
|
||||
TestResults(failed=0, attempted=8)
|
||||
"""
|
||||
assert __doc__ == r'''func placeholder - with ("""\nstring\n""")'''
|
||||
assert foo.__doc__ == r'''func placeholder - ' and with ("""\nstring\n""")'''
|
||||
assert bar.__doc__ == r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """
|
||||
assert baz.__doc__ == \
|
||||
"""
|
||||
... '''>>> assert 1 == 1
|
||||
... '''
|
||||
... \"""
|
||||
>>> exec test_data in m1.__dict__
|
||||
>>> exec test_data in m2.__dict__
|
||||
>>> m1.__dict__.update({"f2": m2._f, "g2": m2.g, "h2": m2.H})
|
||||
|
||||
Tests that objects outside m1 are excluded:
|
||||
\"""
|
||||
>>> t.rundict(m1.__dict__, 'rundict_test_pvt') # None are skipped.
|
||||
TestResults(failed=0, attempted=8)
|
||||
"""
|
||||
|
||||
baz()
|
||||
|
@@ -80,7 +80,7 @@ def main_bin():
|
||||
timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"
|
||||
|
||||
try:
|
||||
opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtdrVo:p:',
|
||||
opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:',
|
||||
'help asm compile= grammar linemaps recurse '
|
||||
'timestamp tree tree+ '
|
||||
'fragments verify verify-run version '
|
||||
@@ -114,7 +114,7 @@ def main_bin():
|
||||
elif opt in ('--tree', '-t'):
|
||||
options['showast'] = True
|
||||
options['do_verify'] = None
|
||||
elif opt in ('--tree+',):
|
||||
elif opt in ('--tree+', '-T'):
|
||||
options['showast'] = 'Full'
|
||||
options['do_verify'] = None
|
||||
elif opt in ('--grammar', '-g'):
|
||||
|
@@ -280,6 +280,19 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
|
||||
sys.stdout.write("\n")
|
||||
sys.stderr.write("\nLast file: %s " % (infile))
|
||||
raise
|
||||
except RuntimeError as e:
|
||||
sys.stdout.write("\n%s\n" % str(e))
|
||||
if str(e).startswith('Unsupported Python'):
|
||||
sys.stdout.write("\n")
|
||||
sys.stderr.write("\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e))
|
||||
else:
|
||||
if outfile:
|
||||
outstream.close()
|
||||
os.remove(outfile)
|
||||
sys.stdout.write("\n")
|
||||
sys.stderr.write("\nLast file: %s " % (infile))
|
||||
raise
|
||||
|
||||
# except:
|
||||
# failed_files += 1
|
||||
# if current_outfile:
|
||||
@@ -337,9 +350,9 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
|
||||
# mem_usage = __memUsage()
|
||||
print mess, infile
|
||||
if current_outfile:
|
||||
sys.stdout.write("%s\r" %
|
||||
status_msg(do_verify, tot_files, okay_files, failed_files,
|
||||
verify_failed_files, do_verify))
|
||||
sys.stdout.write("%s -- %s\r" %
|
||||
(infile, status_msg(do_verify, tot_files, okay_files, failed_files,
|
||||
verify_failed_files, do_verify)))
|
||||
try:
|
||||
# FIXME: Something is weird with Pypy here
|
||||
sys.stdout.flush()
|
||||
|
@@ -801,7 +801,6 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
|
||||
if __name__ == '__main__':
|
||||
def parse_test(co):
|
||||
from uncompyle6 import PYTHON_VERSION, IS_PYPY
|
||||
ast = python_parser('2.7.13', co, showasm=True, is_pypy=True)
|
||||
ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY)
|
||||
print(ast)
|
||||
return
|
||||
|
@@ -650,10 +650,6 @@ class Python3Parser(PythonParser):
|
||||
# FIXME: Use the attr
|
||||
# so this doesn't run into exponential parsing time.
|
||||
if opname.startswith('BUILD_MAP_UNPACK'):
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
rule = 'dict_entry ::= ' + 'expr ' * (token.attr*2)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
|
||||
# FIXME: start here. The LHS should be unmap_dict, not dict.
|
||||
# FIXME: really we need a combination of dict_entry-like things.
|
||||
# It just so happens the most common case is not to mix
|
||||
|
@@ -47,7 +47,7 @@ class Python34Parser(Python33Parser):
|
||||
|
||||
# Python 3.4+ optimizes the trailing two JUMPS away
|
||||
|
||||
# Is this 3.4 only?
|
||||
# This is 3.4 only
|
||||
yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM
|
||||
|
||||
_ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
|
||||
@@ -55,6 +55,7 @@ class Python34Parser(Python33Parser):
|
||||
|
||||
def customize_grammar_rules(self, tokens, customize):
|
||||
self.remove_rules("""
|
||||
yield_from ::= expr expr YIELD_FROM
|
||||
# 3.4.2 has this. 3.4.4 may now
|
||||
# while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP
|
||||
""")
|
||||
|
@@ -36,7 +36,6 @@ class AligningWalker(SourceWalker, object):
|
||||
self.pending_newlines = max(self.pending_newlines, 1)
|
||||
|
||||
def write(self, *data):
|
||||
from trepan.api import debug; debug()
|
||||
if (len(data) == 1) and data[0] == self.indent:
|
||||
diff = max(self.pending_newlines,
|
||||
self.desired_line_number - self.current_line_number)
|
||||
|
@@ -99,14 +99,9 @@ def strip_quotes(str):
|
||||
|
||||
|
||||
def print_docstring(self, indent, docstring):
|
||||
try:
|
||||
if docstring.find('"""') == -1:
|
||||
quote = '"""'
|
||||
else:
|
||||
quote = "'''"
|
||||
docstring = docstring.replace("'''", "\\'''")
|
||||
except:
|
||||
return False
|
||||
quote = '"""'
|
||||
if docstring.find("'''") == -1:
|
||||
quote = "'''"
|
||||
self.write(indent)
|
||||
if not PYTHON3 and not isinstance(docstring, str):
|
||||
# Must be unicode in Python2
|
||||
@@ -132,18 +127,31 @@ def print_docstring(self, indent, docstring):
|
||||
and (docstring[-1] != '"'
|
||||
or docstring[-2] == '\t')):
|
||||
self.write('r') # raw string
|
||||
# restore backslashes unescaped since raw
|
||||
# Restore backslashes unescaped since raw
|
||||
docstring = docstring.replace('\t', '\\')
|
||||
else:
|
||||
# Escape '"' if it's the last character, so it doesn't
|
||||
# ruin the ending triple quote
|
||||
if len(docstring) and docstring[-1] == '"':
|
||||
docstring = docstring[:-1] + '\\"'
|
||||
# Restore escaped backslashes
|
||||
|
||||
# Escape triple quote when needed
|
||||
if quote == '"""':
|
||||
if self.version > 2.7:
|
||||
replace_str = '\\"""'
|
||||
else:
|
||||
replace_str = '\\"\\"\\"'
|
||||
docstring = docstring.replace(quote, replace_str)
|
||||
else:
|
||||
assert quote == "'''"
|
||||
if self.version > 2.7:
|
||||
replace_str = "\\'''"
|
||||
else:
|
||||
replace_str = "\\'\\'\\'"
|
||||
docstring = docstring.replace(quote, replace_str)
|
||||
|
||||
docstring = docstring.replace('\t', '\\\\')
|
||||
# Escape triple quote when needed
|
||||
if quote == '""""':
|
||||
docstring = docstring.replace('"""', '\\"\\"\\"')
|
||||
|
||||
lines = docstring.split('\n')
|
||||
calculate_indent = maxint
|
||||
for line in lines[1:]:
|
||||
@@ -152,6 +160,7 @@ def print_docstring(self, indent, docstring):
|
||||
calculate_indent = min(calculate_indent, len(line) - len(stripped))
|
||||
calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip()))
|
||||
# Remove indentation (first line is special):
|
||||
|
||||
trimmed = [lines[0]]
|
||||
if calculate_indent < maxint:
|
||||
trimmed += [line[calculate_indent:] for line in lines[1:]]
|
||||
@@ -164,7 +173,12 @@ def print_docstring(self, indent, docstring):
|
||||
else:
|
||||
self.println(trimmed[0])
|
||||
for line in trimmed[1:-1]:
|
||||
self.println( indent, line )
|
||||
if line:
|
||||
self.println( indent, line )
|
||||
else:
|
||||
self.println( "\n\n" )
|
||||
pass
|
||||
pass
|
||||
self.println(indent, trimmed[-1], quote)
|
||||
return True
|
||||
|
||||
|
@@ -1100,6 +1100,9 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
comp_store = ast[3]
|
||||
|
||||
have_not = False
|
||||
|
||||
# Iterate to find the innermost store
|
||||
# We'll come back to the list iteration below.
|
||||
while n in ('list_iter', 'comp_iter'):
|
||||
# iterate one nesting deeper
|
||||
if self.version == 3.0 and len(n) == 3:
|
||||
@@ -1109,7 +1112,7 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
n = n[0]
|
||||
|
||||
if n in ('list_for', 'comp_for'):
|
||||
if n[2] == 'store':
|
||||
if n[2] == 'store' and not store:
|
||||
store = n[2]
|
||||
n = n[3]
|
||||
elif n in ('list_if', 'list_if_not', 'comp_if', 'comp_if_not'):
|
||||
@@ -1153,11 +1156,12 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
self.write(' in ')
|
||||
self.preorder(node[-3])
|
||||
|
||||
# Here is where we handle nested list iterations.
|
||||
if ast == 'list_comp' and self.version != 3.0:
|
||||
list_iter = ast[1]
|
||||
assert list_iter == 'list_iter'
|
||||
if list_iter == 'list_for':
|
||||
self.preorder(list_iter[3])
|
||||
if list_iter[0] == 'list_for':
|
||||
self.preorder(list_iter[0][3])
|
||||
self.prec = p
|
||||
return
|
||||
pass
|
||||
|
Reference in New Issue
Block a user