Merge branch 'master' into python-2.4

This commit is contained in:
rocky
2019-05-24 10:37:51 -04:00
17 changed files with 103 additions and 126 deletions

View File

@@ -93,8 +93,8 @@ This uses setup.py, so it follows the standard Python routine:
A GNU makefile is also provided so :code:`make install` (possibly as root or
sudo) will do the steps above.
Testing
-------
Running Tests
-------------
::
@@ -133,18 +133,8 @@ You can also cross compare the results with pycdc_ . Since they work
differently, bugs here often aren't in that, and vice versa.
Known Bugs/Restrictions
-----------------------
The biggest known and possibly fixable (but hard) problem has to do
with handling control flow. (Python has probably the most diverse and
screwy set of compound statements I've ever seen; there
are "else" clauses on loops and try blocks that I suspect many
programmers don't know about.)
All of the Python decompilers that I have looked at have problems
decompiling Python's control flow. In some cases we can detect an
erroneous decompilation and report that.
Verification
------------
In older versions of Python it was possible to verify bytecode by
decompiling bytecode, and then compiling using the Python interpreter
@@ -167,6 +157,19 @@ And already Python has a set of programs like this: the test suite
for the standard library that comes with Python. We have some
code in `test/stdlib` to facilitate this kind of checking.
Known Bugs/Restrictions
-----------------------
The biggest known and possibly fixable (but hard) problem has to do
with handling control flow. (Python has probably the most diverse and
screwy set of compound statements I've ever seen; there
are "else" clauses on loops and try blocks that I suspect many
programmers don't know about.)
All of the Python decompilers that I have looked at have problems
decompiling Python's control flow. In some cases we can detect an
erroneous decompilation and report that.
Python support is strongest in Python 2 for 2.7 and drops off as you
get further away from that. Support is also probably pretty good for
python 2.3-2.4 since a lot of the goodness of early the version of the
@@ -222,7 +225,7 @@ See Also
* https://github.com/zrax/pycdc : purports to support all versions of Python. It is written in C++ and is most accurate for Python versions around 2.7 and 3.3 when the code was more actively developed. Accuracy for more recent versions of Python 3 and early versions of Python are especially lacking. See its `issue tracker <https://github.com/zrax/pycdc/issues>`_ for details. Currently lightly maintained.
* https://code.google.com/archive/p/unpyc3/ : supports Python 3.2 only. The above projects use a different decompiling technique than what is used here. Currently unmaintained.
* https://github.com/figment/unpyc3/ : fork of above, but supports Python 3.3 only. Includes some fixes like supporting function annotations. Currently unmaintained.
* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situtations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
* https://github.com/wibiti/uncompyle2 : supports Python 2.7 only, but does that fairly well. There are situations where `uncompyle6` results are incorrect while `uncompyle2` results are not, but more often uncompyle6 is correct when uncompyle2 is not. Because `uncompyle6` adheres to accuracy over idiomatic Python, `uncompyle2` can produce more natural-looking code when it is correct. Currently `uncompyle2` is lightly maintained. See its issue `tracker <https://github.com/wibiti/uncompyle2/issues>`_ for more details
* `How to report a bug <https://github.com/rocky/python-uncompyle6/blob/master/HOW-TO-REPORT-A-BUG.md>`_
* The HISTORY_ file.
* https://github.com/rocky/python-xdis : Cross Python version disassembler

View File

@@ -1,78 +0,0 @@
import sys
from uncompyle6 import PYTHON3
if PYTHON3:
from io import StringIO
minint = -sys.maxsize-1
maxint = sys.maxsize
else:
from StringIO import StringIO
minint = -sys.maxint-1
maxint = sys.maxint
from uncompyle6.semantics.helper import print_docstring
class PrintFake:
def __init__(self):
self.pending_newlines = 0
self.f = StringIO()
def write(self, *data):
if (len(data) == 0) or (len(data) == 1 and data[0] == ''):
return
out = ''.join((str(j) for j in data))
n = 0
for i in out:
if i == '\n':
n += 1
if n == len(out):
self.pending_newlines = max(self.pending_newlines, n)
return
elif n:
self.pending_newlines = max(self.pending_newlines, n)
out = out[n:]
break
else:
break
if self.pending_newlines > 0:
self.f.write('\n'*self.pending_newlines)
self.pending_newlines = 0
for i in out[::-1]:
if i == '\n':
self.pending_newlines += 1
else:
break
if self.pending_newlines:
out = out[:-self.pending_newlines]
self.f.write(out)
def println(self, *data):
if data and not(len(data) == 1 and data[0] == ''):
self.write(*data)
self.pending_newlines = max(self.pending_newlines, 1)
return
pass
def test_docstring():
for doc, expect in (
("Now is the time",
' """Now is the time"""'),
("""
Now is the time
""",
''' """
Now is the time
"""''')
# (r'''func placeholder - ' and with ("""\nstring\n """)''',
# """ r'''func placeholder - ' and with (\"\"\"\nstring\n\"\"\")'''"""),
# (r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """,
# """ r\"\"\"func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" \"\"\"""")
):
o = PrintFake()
# print(doc)
# print(expect)
print_docstring(o, ' ', doc)
assert expect == o.f.getvalue()

View File

@@ -177,7 +177,7 @@ grammar-coverage-2.6:
grammar-coverage-2.7:
-rm $(COVER_DIR)/spark-grammar-2.7.cover || true
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pythonlib.py --bytecode-2.7
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.14 --max=600
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-2.7.cover $(PYTHON) test_pyenvlib.py --2.7.16 --max=600
#: Get grammar coverage for Python 3.0
grammar-coverage-3.0:
@@ -220,7 +220,12 @@ grammar-coverage-3.5:
grammar-coverage-3.6:
rm $(COVER_DIR)/spark-grammar-3.6.cover || /bin/true
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pythonlib.py --bytecode-3.6
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.4 --max=280
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.6.cover $(PYTHON) test_pyenvlib.py --3.6.8 --max=280
#: Get grammar coverage for Python 3.7
grammar-coverage-3.7:
rm $(COVER_DIR)/spark-grammar-3.7.cover || /bin/true
SPARK_PARSER_COVERAGE=$(COVER_DIR)/spark-grammar-3.7.cover $(PYTHON) test_pyenvlib.py --3.7.3 --max=500
#: Check deparsing Python 2.6
check-bytecode-2.6:

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,7 +1,7 @@
#!/bin/bash
# Remake Python grammar statistics
typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.14 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.5 [3.6]=3.6.4)
typeset -A ALL_VERS=([2.4]=2.4.6 [2.5]=2.5.6 [2.6]=2.6.9 [2.7]=2.7.16 [3.2]=3.2.6 [3.3]=3.3.6 [3.4]=3.4.8 [3.5]=3.5.6 [3.6]=3.6.8, [3.7]=3.7.3)
if (( $# == 0 )); then
echo 1>&2 "usage: $0 two-digit-version"

View File

@@ -1,4 +1,6 @@
# uncompyle2 bug was not escaping """ properly
# RUNNABLE!
r'''func placeholder - with ("""\nstring\n""")'''
def foo():
r'''func placeholder - ' and with ("""\nstring\n""")'''
@@ -20,3 +22,22 @@ def baz():
>>> t.rundict(m1.__dict__, 'rundict_test_pvt') # None are skipped.
TestResults(failed=0, attempted=8)
"""
assert __doc__ == r'''func placeholder - with ("""\nstring\n""")'''
assert foo.__doc__ == r'''func placeholder - ' and with ("""\nstring\n""")'''
assert bar.__doc__ == r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """
assert baz.__doc__ == \
"""
... '''>>> assert 1 == 1
... '''
... \"""
>>> exec test_data in m1.__dict__
>>> exec test_data in m2.__dict__
>>> m1.__dict__.update({"f2": m2._f, "g2": m2.g, "h2": m2.H})
Tests that objects outside m1 are excluded:
\"""
>>> t.rundict(m1.__dict__, 'rundict_test_pvt') # None are skipped.
TestResults(failed=0, attempted=8)
"""
baz()

View File

@@ -80,7 +80,7 @@ def main_bin():
timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"
try:
opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtdrVo:p:',
opts, pyc_paths = getopt.getopt(sys.argv[1:], 'hac:gtTdrVo:p:',
'help asm compile= grammar linemaps recurse '
'timestamp tree tree+ '
'fragments verify verify-run version '
@@ -114,7 +114,7 @@ def main_bin():
elif opt in ('--tree', '-t'):
options['showast'] = True
options['do_verify'] = None
elif opt in ('--tree+',):
elif opt in ('--tree+', '-T'):
options['showast'] = 'Full'
options['do_verify'] = None
elif opt in ('--grammar', '-g'):

View File

@@ -280,6 +280,19 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
sys.stdout.write("\n")
sys.stderr.write("\nLast file: %s " % (infile))
raise
except RuntimeError as e:
sys.stdout.write("\n%s\n" % str(e))
if str(e).startswith('Unsupported Python'):
sys.stdout.write("\n")
sys.stderr.write("\n# Unsupported bytecode in file %s\n# %s\n" % (infile, e))
else:
if outfile:
outstream.close()
os.remove(outfile)
sys.stdout.write("\n")
sys.stderr.write("\nLast file: %s " % (infile))
raise
# except:
# failed_files += 1
# if current_outfile:
@@ -337,9 +350,9 @@ def main(in_base, out_base, compiled_files, source_files, outfile=None,
# mem_usage = __memUsage()
print mess, infile
if current_outfile:
sys.stdout.write("%s\r" %
status_msg(do_verify, tot_files, okay_files, failed_files,
verify_failed_files, do_verify))
sys.stdout.write("%s -- %s\r" %
(infile, status_msg(do_verify, tot_files, okay_files, failed_files,
verify_failed_files, do_verify)))
try:
# FIXME: Something is weird with Pypy here
sys.stdout.flush()

View File

@@ -801,7 +801,6 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
if __name__ == '__main__':
def parse_test(co):
from uncompyle6 import PYTHON_VERSION, IS_PYPY
ast = python_parser('2.7.13', co, showasm=True, is_pypy=True)
ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY)
print(ast)
return

View File

@@ -650,10 +650,6 @@ class Python3Parser(PythonParser):
# FIXME: Use the attr
# so this doesn't run into exponential parsing time.
if opname.startswith('BUILD_MAP_UNPACK'):
self.add_unique_rule(rule, opname, token.attr, customize)
rule = 'dict_entry ::= ' + 'expr ' * (token.attr*2)
self.add_unique_rule(rule, opname, token.attr, customize)
# FIXME: start here. The LHS should be unmap_dict, not dict.
# FIXME: really we need a combination of dict_entry-like things.
# It just so happens the most common case is not to mix

View File

@@ -47,7 +47,7 @@ class Python34Parser(Python33Parser):
# Python 3.4+ optimizes the trailing two JUMPS away
# Is this 3.4 only?
# This is 3.4 only
yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM
_ifstmts_jump ::= c_stmts_opt JUMP_ABSOLUTE JUMP_FORWARD COME_FROM
@@ -55,6 +55,7 @@ class Python34Parser(Python33Parser):
def customize_grammar_rules(self, tokens, customize):
self.remove_rules("""
yield_from ::= expr expr YIELD_FROM
# 3.4.2 has this. 3.4.4 may now
# while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP
""")

View File

@@ -36,7 +36,6 @@ class AligningWalker(SourceWalker, object):
self.pending_newlines = max(self.pending_newlines, 1)
def write(self, *data):
from trepan.api import debug; debug()
if (len(data) == 1) and data[0] == self.indent:
diff = max(self.pending_newlines,
self.desired_line_number - self.current_line_number)

View File

@@ -99,14 +99,9 @@ def strip_quotes(str):
def print_docstring(self, indent, docstring):
try:
if docstring.find('"""') == -1:
quote = '"""'
else:
if docstring.find("'''") == -1:
quote = "'''"
docstring = docstring.replace("'''", "\\'''")
except:
return False
self.write(indent)
if not PYTHON3 and not isinstance(docstring, str):
# Must be unicode in Python2
@@ -132,18 +127,31 @@ def print_docstring(self, indent, docstring):
and (docstring[-1] != '"'
or docstring[-2] == '\t')):
self.write('r') # raw string
# restore backslashes unescaped since raw
# Restore backslashes unescaped since raw
docstring = docstring.replace('\t', '\\')
else:
# Escape '"' if it's the last character, so it doesn't
# ruin the ending triple quote
if len(docstring) and docstring[-1] == '"':
docstring = docstring[:-1] + '\\"'
# Restore escaped backslashes
docstring = docstring.replace('\t', '\\\\')
# Escape triple quote when needed
if quote == '""""':
docstring = docstring.replace('"""', '\\"\\"\\"')
if quote == '"""':
if self.version > 2.7:
replace_str = '\\"""'
else:
replace_str = '\\"\\"\\"'
docstring = docstring.replace(quote, replace_str)
else:
assert quote == "'''"
if self.version > 2.7:
replace_str = "\\'''"
else:
replace_str = "\\'\\'\\'"
docstring = docstring.replace(quote, replace_str)
docstring = docstring.replace('\t', '\\\\')
lines = docstring.split('\n')
calculate_indent = maxint
for line in lines[1:]:
@@ -152,6 +160,7 @@ def print_docstring(self, indent, docstring):
calculate_indent = min(calculate_indent, len(line) - len(stripped))
calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip()))
# Remove indentation (first line is special):
trimmed = [lines[0]]
if calculate_indent < maxint:
trimmed += [line[calculate_indent:] for line in lines[1:]]
@@ -164,7 +173,12 @@ def print_docstring(self, indent, docstring):
else:
self.println(trimmed[0])
for line in trimmed[1:-1]:
if line:
self.println( indent, line )
else:
self.println( "\n\n" )
pass
pass
self.println(indent, trimmed[-1], quote)
return True

View File

@@ -1100,6 +1100,9 @@ class SourceWalker(GenericASTTraversal, object):
comp_store = ast[3]
have_not = False
# Iterate to find the innermost store
# We'll come back to the list iteration below.
while n in ('list_iter', 'comp_iter'):
# iterate one nesting deeper
if self.version == 3.0 and len(n) == 3:
@@ -1109,7 +1112,7 @@ class SourceWalker(GenericASTTraversal, object):
n = n[0]
if n in ('list_for', 'comp_for'):
if n[2] == 'store':
if n[2] == 'store' and not store:
store = n[2]
n = n[3]
elif n in ('list_if', 'list_if_not', 'comp_if', 'comp_if_not'):
@@ -1153,11 +1156,12 @@ class SourceWalker(GenericASTTraversal, object):
self.write(' in ')
self.preorder(node[-3])
# Here is where we handle nested list iterations.
if ast == 'list_comp' and self.version != 3.0:
list_iter = ast[1]
assert list_iter == 'list_iter'
if list_iter == 'list_for':
self.preorder(list_iter[3])
if list_iter[0] == 'list_for':
self.preorder(list_iter[0][3])
self.prec = p
return
pass