You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 08:49:51 +08:00
Compare commits
36 Commits
release-2.
...
release-2.
Author | SHA1 | Date | |
---|---|---|---|
|
da50394841 | ||
|
13d5cd1a58 | ||
|
08dcc7d820 | ||
|
7755563b65 | ||
|
b43cbc050d | ||
|
db7a26d47d | ||
|
92166452c1 | ||
|
96fa3ef381 | ||
|
755415c7d8 | ||
|
b168e1de55 | ||
|
38eed14b41 | ||
|
2c993f8c32 | ||
|
65858a4c74 | ||
|
263c63e009 | ||
|
813bce4697 | ||
|
a5d2237435 | ||
|
d22931cb49 | ||
|
9cc2700160 | ||
|
a5a0f45dde | ||
|
3c02fa7e36 | ||
|
0d0f836f76 | ||
|
69c93cc665 | ||
|
97576e473d | ||
|
1e324e0e8d | ||
|
7ab4e1fbdb | ||
|
abecb21671 | ||
|
8be6369bdf | ||
|
8941417a54 | ||
|
cbcfd53dae | ||
|
df2ca51f4a | ||
|
4f4069c6b5 | ||
|
6aa1531972 | ||
|
4fcb385dc0 | ||
|
260ddedbfd | ||
|
f8917aaf88 | ||
|
c8550d5c9e |
@@ -8,6 +8,7 @@ python:
|
||||
- '2.6'
|
||||
- '3.3'
|
||||
- '3.4'
|
||||
- '3.2'
|
||||
|
||||
install:
|
||||
- pip install -r requirements.txt
|
||||
|
101
ChangeLog
101
ChangeLog
@@ -1,6 +1,105 @@
|
||||
2016-12-04 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/version.py: Get ready for release 2.9.7
|
||||
|
||||
2016-11-28 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/parsers/parse3.py, uncompyle6/parsers/parse36.py:
|
||||
Shorten Python3 grammars with + and *
|
||||
|
||||
2016-11-28 rocky <rb@dustyfeet.com>
|
||||
|
||||
* __pkginfo__.py, uncompyle6/parser.py,
|
||||
uncompyle6/parsers/parse2.py: Try new spark 2.5.1 grammar syntax
|
||||
shortcuts This package I now declare stable
|
||||
|
||||
2016-11-28 R. Bernstein <rocky@users.noreply.github.com>
|
||||
|
||||
* README.rst: Update README.rst
|
||||
|
||||
2016-11-27 rocky <rb@dustyfeet.com>
|
||||
|
||||
* README.rst: Limitations of decompiling control structures.
|
||||
|
||||
2016-11-27 R. Bernstein <rocky@users.noreply.github.com>
|
||||
|
||||
* : Merge pull request #69 from rocky/ast-reduce-checks AST reduce checks
|
||||
|
||||
2016-11-26 rocky <rb@dustyfeet.com>
|
||||
|
||||
* test/simple_source/bug26/03_elif_vs_continue.py,
|
||||
uncompyle6/main.py, uncompyle6/parser.py,
|
||||
uncompyle6/parsers/parse2.py, uncompyle6/scanners/scanner2.py,
|
||||
uncompyle6/scanners/scanner26.py: Misc changes scanner26.py: make scanner2.py and scanner26.py more alike
|
||||
scanner2.py: check that return stmt is last in list. (May change)
|
||||
main.py: show filename on verify error test/*: add more
|
||||
|
||||
2016-11-25 rocky <rb@dustyfeet.com>
|
||||
|
||||
* __pkginfo__.py, test/Makefile, uncompyle6/parser.py,
|
||||
uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py: Start
|
||||
grammar reduction checks
|
||||
|
||||
2016-11-24 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/parsers/parse27.py, uncompyle6/scanners/scanner2.py,
|
||||
uncompyle6/semantics/helper.py, uncompyle6/semantics/pysource.py:
|
||||
2.7 grammar bug workaround. Fix docstring bug
|
||||
|
||||
2016-11-24 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/semantics/pysource.py: Better line number tracking Indent Python 2 list comprehensions, albeit badly. DRY code a
|
||||
little via indent_if_source_nl
|
||||
|
||||
2016-11-24 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/parsers/parse3.py, uncompyle6/scanners/scanner2.py:
|
||||
<2.7 "if" detection and dup Python 3 grammar rule
|
||||
|
||||
2016-11-23 rocky <rb@dustyfeet.com>
|
||||
|
||||
* __pkginfo__.py, pytest/test_grammar.py, uncompyle6/parser.py,
|
||||
uncompyle6/parsers/parse26.py: Python 2.6 grammary bug and.. __pkginfo.py__: Bump spark_parser version for parse_flags 'dups'
|
||||
|
||||
2016-11-23 rocky <rb@dustyfeet.com>
|
||||
|
||||
* __pkginfo__.py: Note that we now work on 2.4 and 2.5
|
||||
|
||||
2016-11-23 rocky <rb@dustyfeet.com>
|
||||
|
||||
* : commit 6aa1531972de83ecab15b4c96b89c873ea5a7458 Author: rocky
|
||||
<rb@dustyfeet.com> Date: Wed Nov 23 00:48:38 2016 -0500
|
||||
|
||||
2016-11-22 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/parsers/parse3.py, uncompyle6/parsers/parse32.py,
|
||||
uncompyle6/parsers/parse33.py, uncompyle6/parsers/parse34.py,
|
||||
uncompyle6/parsers/parse35.py: DRY Python3 grammar
|
||||
|
||||
2016-11-22 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse27.py,
|
||||
uncompyle6/scanners/scanner2.py: More detailed COME_FROMs For now we only add COME_FROM_FINALLY and COME_FROM_WITH and even
|
||||
here only on 2.7
|
||||
|
||||
2016-11-22 rocky <rb@dustyfeet.com>
|
||||
|
||||
* circle.yml, pytest/test_grammar.py, tox.ini,
|
||||
uncompyle6/parser.py, uncompyle6/parsers/parse2.py,
|
||||
uncompyle6/parsers/parse27.py: Remove redundant 2.7 (and 2.x)
|
||||
grammar rules
|
||||
|
||||
2016-11-22 rocky <rb@dustyfeet.com>
|
||||
|
||||
* pytest/test_docstring.py, uncompyle6/linenumbers.py,
|
||||
uncompyle6/semantics/fragments.py, uncompyle6/semantics/helper.py,
|
||||
uncompyle6/semantics/make_function.py,
|
||||
uncompyle6/semantics/pysource.py: Split out print_docstring move from pysource.py to new helper.py
|
||||
|
||||
2016-11-20 rocky <rb@dustyfeet.com>
|
||||
|
||||
* uncompyle6/version.py: Get ready for release 2.9.6
|
||||
* ChangeLog, NEWS, uncompyle6/version.py: Get ready for release
|
||||
2.9.6
|
||||
|
||||
2016-11-20 R. Bernstein <rocky@users.noreply.github.com>
|
||||
|
||||
|
18
NEWS
18
NEWS
@@ -1,3 +1,21 @@
|
||||
uncompyle6 2.9.7 2016-12-16
|
||||
|
||||
- Start to handle 3.5/3.6 build_map_unpack_with_call
|
||||
- Some Python 3.6 bytecode to wordcode conversion fixes
|
||||
- option -g: show start-end range when possible
|
||||
- track print_docstring move to help (used in python 3.1)
|
||||
- verify: allow RETURN_VALUE to match RETURN_END_IF
|
||||
- some 3.2 compatibility
|
||||
- Better Python 3 control flow detection by adding Pseudo ELSE opcodes
|
||||
|
||||
uncompyle6 2.9.6 2016-12-04
|
||||
|
||||
- Shorten Python3 grammars with + and *
|
||||
this requires spark parser 1.5.1
|
||||
- Add some AST reduction checks to improve
|
||||
decompile accuracy. This too requires
|
||||
spark parser 1.5.1
|
||||
|
||||
uncompyle6 2.9.6 2016-11-20
|
||||
|
||||
- Correct MANIFEST.in
|
||||
|
25
README.rst
25
README.rst
@@ -43,7 +43,8 @@ information.
|
||||
Requirements
|
||||
------------
|
||||
|
||||
This project requires Python 2.6 or later, PyPy 3-2.4, or PyPy-5.0.1.
|
||||
This project requires Python 2.6 or later, PyPy 3-2.4, or PyPy-5.0.1.
|
||||
Python versions 2.3-2.7 are supported in the python-2.4 branch.
|
||||
The bytecode files it can read has been tested on Python bytecodes from
|
||||
versions 2.1-2.7, and 3.2-3.6 and the above-mentioned PyPy versions.
|
||||
|
||||
@@ -97,7 +98,8 @@ Known Bugs/Restrictions
|
||||
-----------------------
|
||||
|
||||
The biggest known and possibly fixable (but hard) problem has to do
|
||||
with handling control flow. In some cases we can detect an erroneous
|
||||
with handling control flow. All of the Python decompilers I have looked
|
||||
at have the same problem. In some cases we can detect an erroneous
|
||||
decompilation and report that.
|
||||
|
||||
About 90% of the decompilation of Python standard library packages in
|
||||
@@ -109,14 +111,17 @@ Other versions drop off in quality too.
|
||||
a Python for that bytecode version, and then comparing the bytecode
|
||||
produced by the decompiled/compiled program. Some allowance is made
|
||||
for inessential differences. But other semantically equivalent
|
||||
differences are not caught. For example ``if x: foo()`` is
|
||||
equivalent to ``x and foo()`` and decompilation may turn one into the
|
||||
other. *Weak Verification* on the other hand doesn't check bytecode
|
||||
for equivalence but does check to see if the resulting decompiled
|
||||
source is a valid Python program by running the Python
|
||||
interpreter. Because the Python language has changed so much, for best
|
||||
results you should use the same Python Version in checking as used in
|
||||
the bytecode.
|
||||
differences are not caught. For example ``1 and 0`` is decompiled to
|
||||
the equivalent ``0``; remnants of the first true evaluation (1) is
|
||||
lost when Python compiles this. When Python next compiles ``0`` the
|
||||
resulting code is simpler.
|
||||
|
||||
*Weak Verification*
|
||||
on the other hand doesn't check bytecode for equivalence but does
|
||||
check to see if the resulting decompiled source is a valid Python
|
||||
program by running the Python interpreter. Because the Python language
|
||||
has changed so much, for best results you should use the same Python
|
||||
Version in checking as used in the bytecode.
|
||||
|
||||
Later distributions average about 200 files. There is some work to do
|
||||
on the lower end Python versions which is more difficult for us to
|
||||
|
@@ -12,14 +12,16 @@ copyright = """
|
||||
Copyright (C) 2015, 2016 Rocky Bernstein <rb@dustyfeet.com>.
|
||||
"""
|
||||
|
||||
classifiers = ['Development Status :: 4 - Beta',
|
||||
classifiers = ['Development Status :: 5 - Production/Stable',
|
||||
'Intended Audience :: Developers',
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.4',
|
||||
'Programming Language :: Python :: 2.5',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.1',
|
||||
'Programming Language :: Python :: 3.2',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
@@ -37,8 +39,8 @@ entry_points={
|
||||
'pydisassemble=uncompyle6.bin.pydisassemble:main',
|
||||
]}
|
||||
ftp_url = None
|
||||
install_requires = ['spark-parser >= 1.4.0, < 1.5.0',
|
||||
'xdis >= 3.2.3, < 3.3.0']
|
||||
install_requires = ['spark-parser >= 1.5.1, < 1.6.0',
|
||||
'xdis >= 3.2.4, < 3.3.0']
|
||||
license = 'MIT'
|
||||
mailing_list = 'python-debugger@googlegroups.com'
|
||||
modname = 'uncompyle6'
|
||||
|
@@ -1,6 +1,6 @@
|
||||
machine:
|
||||
python:
|
||||
version: 2.7.8
|
||||
version: 2.7.10
|
||||
environment:
|
||||
COMPILE: --compile
|
||||
|
||||
|
78
pytest/test_docstring.py
Normal file
78
pytest/test_docstring.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import sys
|
||||
from uncompyle6 import PYTHON3
|
||||
if PYTHON3:
|
||||
from io import StringIO
|
||||
minint = -sys.maxsize-1
|
||||
maxint = sys.maxsize
|
||||
else:
|
||||
from StringIO import StringIO
|
||||
minint = -sys.maxint-1
|
||||
maxint = sys.maxint
|
||||
from uncompyle6.semantics.helper import print_docstring
|
||||
|
||||
class PrintFake():
|
||||
def __init__(self):
|
||||
self.pending_newlines = 0
|
||||
self.f = StringIO()
|
||||
|
||||
def write(self, *data):
|
||||
if (len(data) == 0) or (len(data) == 1 and data[0] == ''):
|
||||
return
|
||||
out = ''.join((str(j) for j in data))
|
||||
n = 0
|
||||
for i in out:
|
||||
if i == '\n':
|
||||
n += 1
|
||||
if n == len(out):
|
||||
self.pending_newlines = max(self.pending_newlines, n)
|
||||
return
|
||||
elif n:
|
||||
self.pending_newlines = max(self.pending_newlines, n)
|
||||
out = out[n:]
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if self.pending_newlines > 0:
|
||||
self.f.write('\n'*self.pending_newlines)
|
||||
self.pending_newlines = 0
|
||||
|
||||
for i in out[::-1]:
|
||||
if i == '\n':
|
||||
self.pending_newlines += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if self.pending_newlines:
|
||||
out = out[:-self.pending_newlines]
|
||||
self.f.write(out)
|
||||
def println(self, *data):
|
||||
if data and not(len(data) == 1 and data[0] ==''):
|
||||
self.write(*data)
|
||||
self.pending_newlines = max(self.pending_newlines, 1)
|
||||
return
|
||||
pass
|
||||
|
||||
def test_docstring():
|
||||
|
||||
for doc, expect in (
|
||||
("Now is the time",
|
||||
' """Now is the time"""'),
|
||||
("""
|
||||
Now is the time
|
||||
""",
|
||||
''' """
|
||||
Now is the time
|
||||
"""''')
|
||||
|
||||
# (r'''func placeholder - ' and with ("""\nstring\n """)''',
|
||||
# """ r'''func placeholder - ' and with (\"\"\"\nstring\n\"\"\")'''"""),
|
||||
# (r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """,
|
||||
# """ r\"\"\"func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" \"\"\"""")
|
||||
):
|
||||
|
||||
o = PrintFake()
|
||||
# print(doc)
|
||||
# print(expect)
|
||||
print_docstring(o, ' ', doc)
|
||||
assert expect == o.f.getvalue()
|
@@ -1,6 +1,6 @@
|
||||
import re
|
||||
from uncompyle6 import PYTHON_VERSION, PYTHON3, IS_PYPY # , PYTHON_VERSION
|
||||
from uncompyle6.parser import get_python_parser
|
||||
from uncompyle6.parser import get_python_parser, python_parser
|
||||
from uncompyle6.scanner import get_scanner
|
||||
|
||||
def test_grammar():
|
||||
@@ -41,7 +41,7 @@ def test_grammar():
|
||||
"""
|
||||
JUMP_BACK CONTINUE RETURN_END_IF
|
||||
COME_FROM COME_FROM_EXCEPT COME_FROM_LOOP COME_FROM_WITH
|
||||
COME_FROM_FINALLY
|
||||
COME_FROM_FINALLY ELSE
|
||||
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
|
||||
LAMBDA_MARKER RETURN_LAST
|
||||
""".split())
|
||||
@@ -53,3 +53,11 @@ def test_grammar():
|
||||
ignore_set.add('STORE_LOCALS')
|
||||
opcode_set = set(s.opc.opname).union(ignore_set)
|
||||
check_tokens(tokens, opcode_set)
|
||||
|
||||
def test_dup_rule():
|
||||
import inspect
|
||||
python_parser(PYTHON_VERSION, inspect.currentframe().f_code,
|
||||
is_pypy=IS_PYPY,
|
||||
parser_debug={
|
||||
'dups': True, 'transition': False, 'reduce': False,
|
||||
'rules': False, 'errorstack': None, 'context': True})
|
||||
|
@@ -22,9 +22,9 @@ check:
|
||||
#: Run working tests from Python 2.6 or 2.7
|
||||
check-2.6 check-2.7: check-bytecode-2 check-bytecode-3 check-bytecode-1 check-2.7-ok
|
||||
|
||||
#: Run working tests from Python 3.1
|
||||
#: Run working tests from Python 3.0
|
||||
check-3.0: check-bytecode
|
||||
@echo Python 3.0 testing not done yet
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.0 --weak-verify $(COMPILE)
|
||||
|
||||
#: Run working tests from Python 3.1
|
||||
check-3.1: check-bytecode
|
||||
@@ -36,11 +36,11 @@ check-3.2: check-bytecode
|
||||
|
||||
#: Run working tests from Python 3.3
|
||||
check-3.3: check-bytecode
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.3 --weak-verify $(COMPILE)
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.3 --verify $(COMPILE)
|
||||
|
||||
#: Run working tests from Python 3.4
|
||||
check-3.4: check-bytecode check-3.4-ok check-2.7-ok
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.4 --weak-verify $(COMPILE)
|
||||
$(PYTHON) test_pythonlib.py --bytecode-3.4 --verify $(COMPILE)
|
||||
|
||||
#: Run working tests from Python 3.5
|
||||
check-3.5: check-bytecode
|
||||
@@ -104,7 +104,7 @@ check-bytecode-2.6:
|
||||
|
||||
#: Check deparsing Python 2.7
|
||||
check-bytecode-2.7:
|
||||
$(PYTHON) test_pythonlib.py --bytecode-2.7
|
||||
$(PYTHON) test_pythonlib.py --bytecode-2.7 --verify
|
||||
|
||||
#: Check deparsing Python 3.0
|
||||
check-bytecode-3.0:
|
||||
|
Binary file not shown.
BIN
test/bytecode_2.6/03_elif_vs_continue.pyc
Normal file
BIN
test/bytecode_2.6/03_elif_vs_continue.pyc
Normal file
Binary file not shown.
BIN
test/bytecode_3.3/03_while_else.pyc
Normal file
BIN
test/bytecode_3.3/03_while_else.pyc
Normal file
Binary file not shown.
BIN
test/bytecode_3.5/02_build_map_unpack_with_call.pyc
Normal file
BIN
test/bytecode_3.5/02_build_map_unpack_with_call.pyc
Normal file
Binary file not shown.
Binary file not shown.
18
test/simple_source/bug26/03_elif_vs_continue.py
Normal file
18
test/simple_source/bug26/03_elif_vs_continue.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Bug was using continue fouling up 1st elif, by confusing
|
||||
# the "pass" for "continue" by not recognizing the if jump
|
||||
# around it. We fixed by ignoring what's done in Python 2.7
|
||||
# Better is better detection of control structures
|
||||
|
||||
def _compile_charset(charset, flags, code, fixup=None):
|
||||
# compile charset subprogram
|
||||
emit = code.append
|
||||
if fixup is None:
|
||||
fixup = 1
|
||||
for op, av in charset:
|
||||
if op is flags:
|
||||
pass
|
||||
elif op is code:
|
||||
emit(fixup(av))
|
||||
else:
|
||||
raise RuntimeError
|
||||
emit(5)
|
8
test/simple_source/bug33/03_while_else.py
Normal file
8
test/simple_source/bug33/03_while_else.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# Bug from 3.4 threading. Bug is handling while/else
|
||||
def acquire(self):
|
||||
with self._cond:
|
||||
while self:
|
||||
rc = False
|
||||
else:
|
||||
rc = True
|
||||
return rc
|
@@ -0,0 +1 @@
|
||||
f(**a, **b)
|
9
tox.ini
9
tox.ini
@@ -6,13 +6,14 @@ filename = *.py
|
||||
ignore = C901,E113,E121,E122,E123,E124,E125,E126,E127,E128,E129,E201,E202,E203,E221,E222,E225,E226,E241,E242,E251,E261,E271,E272,E302,E401,E501,F401,E701,E702
|
||||
|
||||
[tox]
|
||||
envlist = py26, py27, pypy
|
||||
envlist = py27, py34, pypy
|
||||
|
||||
[testenv]
|
||||
deps =
|
||||
requests>=0.8.8
|
||||
mock>=1.0.1
|
||||
commands = python -W always setup.py nosetests {posargs}
|
||||
hypothesis
|
||||
pytest
|
||||
flake8
|
||||
commands = python -W always make test {posargs}
|
||||
|
||||
[testenv:py27]
|
||||
deps =
|
||||
|
@@ -66,9 +66,9 @@ def usage():
|
||||
|
||||
def main_bin():
|
||||
if not (sys.version_info[0:2] in ((2, 6), (2, 7),
|
||||
(3, 2), (3, 3),
|
||||
(3, 1), (3, 2), (3, 3),
|
||||
(3, 4), (3, 5), (3, 6))):
|
||||
print('Error: %s requires Python 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, or 3.6' % program,
|
||||
print('Error: %s requires Python 2.6-2.7, or 3.1-3.6' % program,
|
||||
file=sys.stderr)
|
||||
sys.exit(-1)
|
||||
|
||||
@@ -142,7 +142,6 @@ def main_bin():
|
||||
if src_base:
|
||||
sb_len = len( os.path.join(src_base, '') )
|
||||
files = [f[sb_len:] for f in files]
|
||||
del sb_len
|
||||
|
||||
if not files:
|
||||
print("No files given", file=sys.stderr)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
from collections import deque, namedtuple
|
||||
from collections import deque
|
||||
|
||||
from xdis.code import iscode
|
||||
from xdis.load import load_file, load_module
|
||||
|
@@ -49,7 +49,6 @@ def uncompyle(
|
||||
raise pysource.SourceWalkerError(str(e))
|
||||
|
||||
|
||||
|
||||
def uncompyle_file(filename, outstream=None, showasm=None, showast=False,
|
||||
showgrammar=False):
|
||||
"""
|
||||
@@ -61,7 +60,6 @@ def uncompyle_file(filename, outstream=None, showasm=None, showast=False,
|
||||
(version, timestamp, magic_int, co, is_pypy,
|
||||
source_size) = load_module(filename, code_objects)
|
||||
|
||||
|
||||
if type(co) == list:
|
||||
for con in co:
|
||||
uncompyle(version, con, outstream, showasm, showast,
|
||||
@@ -189,17 +187,16 @@ def main(in_base, out_base, files, codes, outfile=None,
|
||||
print(e)
|
||||
verify_failed_files += 1
|
||||
os.rename(outfile, outfile + '_unverified')
|
||||
sys.stderr.write("### Error Verifying %s\n" % filename)
|
||||
sys.stderr.write(str(e) + "\n")
|
||||
if not outfile:
|
||||
print("### Error Verifiying %s" % filename, file=sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
if raise_on_error:
|
||||
raise
|
||||
pass
|
||||
pass
|
||||
pass
|
||||
elif do_verify:
|
||||
print("\n### uncompile successful, but no file to compare against",
|
||||
file=sys.stderr)
|
||||
sys.stderr.write("\n### uncompile successful, but no file to compare against\n")
|
||||
pass
|
||||
else:
|
||||
okay_files += 1
|
||||
|
@@ -69,6 +69,33 @@ class PythonParser(GenericASTBuilder):
|
||||
for i in dir(self):
|
||||
setattr(self, i, None)
|
||||
|
||||
def debug_reduce(self, rule, tokens, parent, i):
|
||||
"""Customized format and print for our kind of tokens
|
||||
which gets called in debugging grammar reduce rules
|
||||
"""
|
||||
def fix(c):
|
||||
s = str(c)
|
||||
i = s.find('_')
|
||||
return s if i == -1 else s[:i]
|
||||
|
||||
prefix = ''
|
||||
if parent and tokens:
|
||||
p_token = tokens[parent]
|
||||
if hasattr(p_token, 'linestart') and p_token.linestart:
|
||||
prefix = 'L.%3d: ' % p_token.linestart
|
||||
else:
|
||||
prefix = ' '
|
||||
if hasattr(p_token, 'offset'):
|
||||
prefix += "%3s" % fix(p_token.offset)
|
||||
if len(rule[1]) > 1:
|
||||
prefix += '-%-3s ' % fix(tokens[i-1].offset)
|
||||
else:
|
||||
prefix += ' '
|
||||
else:
|
||||
prefix = ' '
|
||||
|
||||
print("%s%s ::= %s" % (prefix, rule[0], ' '.join(rule[1])))
|
||||
|
||||
def error(self, instructions, index):
|
||||
# Find the last line boundary
|
||||
for start in range(index, -1, -1):
|
||||
@@ -117,9 +144,9 @@ class PythonParser(GenericASTBuilder):
|
||||
# print >> sys.stderr, 'resolve', str(list)
|
||||
return GenericASTBuilder.resolve(self, list)
|
||||
|
||||
##############################################
|
||||
## Common Python 2 and Python 3 grammar rules
|
||||
##############################################
|
||||
###############################################
|
||||
# Common Python 2 and Python 3 grammar rules #
|
||||
###############################################
|
||||
def p_start(self, args):
|
||||
'''
|
||||
# The start or goal symbol
|
||||
@@ -138,8 +165,7 @@ class PythonParser(GenericASTBuilder):
|
||||
"""
|
||||
passstmt ::=
|
||||
|
||||
_stmts ::= _stmts stmt
|
||||
_stmts ::= stmt
|
||||
_stmts ::= stmt+
|
||||
|
||||
# statements with continue
|
||||
c_stmts ::= _stmts
|
||||
@@ -246,13 +272,11 @@ class PythonParser(GenericASTBuilder):
|
||||
|
||||
# Zero or more COME_FROMs
|
||||
# loops can have this
|
||||
_come_from ::= _come_from COME_FROM
|
||||
_come_from ::=
|
||||
_come_from ::= COME_FROM*
|
||||
|
||||
# Zero or one COME_FROM
|
||||
# And/or expressions have this
|
||||
come_from_opt ::= COME_FROM
|
||||
come_from_opt ::=
|
||||
come_from_opt ::= COME_FROM?
|
||||
"""
|
||||
|
||||
def p_dictcomp(self, args):
|
||||
@@ -425,7 +449,6 @@ class PythonParser(GenericASTBuilder):
|
||||
expr ::= unary_not
|
||||
expr ::= binary_subscr
|
||||
expr ::= binary_subscr2
|
||||
expr ::= load_attr
|
||||
expr ::= get_iter
|
||||
expr ::= buildslice2
|
||||
expr ::= buildslice3
|
||||
@@ -467,6 +490,8 @@ class PythonParser(GenericASTBuilder):
|
||||
_mklambda ::= load_closure mklambda
|
||||
_mklambda ::= mklambda
|
||||
|
||||
# "and" where the first part of the and is true,
|
||||
# so there is only the 2nd part to evaluate
|
||||
and2 ::= _jump jmp_false COME_FROM expr COME_FROM
|
||||
|
||||
expr ::= conditional
|
||||
@@ -556,7 +581,7 @@ def parse(p, tokens, customize):
|
||||
|
||||
|
||||
def get_python_parser(
|
||||
version, debug_parser={}, compile_mode='exec',
|
||||
version, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode='exec',
|
||||
is_pypy = False):
|
||||
"""Returns parser object for Python version 2 or 3, 3.2, 3.5on,
|
||||
etc., depending on the parameters passed. *compile_mode* is either
|
||||
@@ -710,8 +735,8 @@ def python_parser(version, co, out=sys.stdout, showasm=False,
|
||||
maybe_show_asm(showasm, tokens)
|
||||
|
||||
# For heavy grammar debugging
|
||||
parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
|
||||
'showstack': 'full'}
|
||||
# parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
|
||||
# 'showstack': 'full'}
|
||||
p = get_python_parser(version, parser_debug)
|
||||
return parse(p, tokens, customize)
|
||||
|
||||
|
@@ -9,7 +9,7 @@ e.g. 5, myvariable, "for", etc. they are CPython Bytecode tokens,
|
||||
e.g. "LOAD_CONST 5", "STORE NAME myvariable", "SETUP_LOOP", etc.
|
||||
|
||||
If we succeed in creating a parse tree, then we have a Python program
|
||||
that a later phase can tern into a sequence of ASCII text.
|
||||
that a later phase can turn into a sequence of ASCII text.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
@@ -25,20 +25,18 @@ class Python2Parser(PythonParser):
|
||||
self.new_rules = set()
|
||||
|
||||
def p_print2(self, args):
|
||||
'''
|
||||
"""
|
||||
stmt ::= print_items_stmt
|
||||
stmt ::= print_nl
|
||||
stmt ::= print_items_nl_stmt
|
||||
|
||||
print_items_stmt ::= expr PRINT_ITEM print_items_opt
|
||||
print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT
|
||||
print_items_opt ::= print_items
|
||||
print_items_opt ::=
|
||||
print_items ::= print_items print_item
|
||||
print_items ::= print_item
|
||||
print_item ::= expr PRINT_ITEM_CONT
|
||||
print_nl ::= PRINT_NEWLINE
|
||||
'''
|
||||
print_items_opt ::= print_items?
|
||||
print_items ::= print_item+
|
||||
print_item ::= expr PRINT_ITEM_CONT
|
||||
print_nl ::= PRINT_NEWLINE
|
||||
"""
|
||||
|
||||
def p_stmt2(self, args):
|
||||
"""
|
||||
@@ -76,8 +74,6 @@ class Python2Parser(PythonParser):
|
||||
return_if_stmts ::= _stmts return_if_stmt
|
||||
return_if_stmt ::= ret_expr RETURN_END_IF
|
||||
|
||||
stmt ::= importstmt
|
||||
|
||||
stmt ::= break_stmt
|
||||
break_stmt ::= BREAK_LOOP
|
||||
|
||||
@@ -171,8 +167,7 @@ class Python2Parser(PythonParser):
|
||||
try_middle ::= jmp_abs COME_FROM except_stmts
|
||||
END_FINALLY
|
||||
|
||||
except_stmts ::= except_stmts except_stmt
|
||||
except_stmts ::= except_stmt
|
||||
except_stmts ::= except_stmt+
|
||||
|
||||
except_stmt ::= except_cond1 except_suite
|
||||
except_stmt ::= except
|
||||
@@ -210,14 +205,6 @@ class Python2Parser(PythonParser):
|
||||
and ::= expr jmp_false expr come_from_opt
|
||||
or ::= expr jmp_true expr come_from_opt
|
||||
|
||||
slice0 ::= expr SLICE+0
|
||||
slice0 ::= expr DUP_TOP SLICE+0
|
||||
slice1 ::= expr expr SLICE+1
|
||||
slice1 ::= expr expr DUP_TOPX_2 SLICE+1
|
||||
slice2 ::= expr expr SLICE+2
|
||||
slice2 ::= expr expr DUP_TOPX_2 SLICE+2
|
||||
slice3 ::= expr expr expr SLICE+3
|
||||
slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3
|
||||
unary_convert ::= expr UNARY_CONVERT
|
||||
|
||||
# In Python 3, DUP_TOPX_2 is DUP_TOP_TWO
|
||||
@@ -248,11 +235,10 @@ class Python2Parser(PythonParser):
|
||||
"""
|
||||
inplace_op ::= INPLACE_DIVIDE
|
||||
binary_op ::= BINARY_DIVIDE
|
||||
binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
|
||||
"""
|
||||
|
||||
def add_custom_rules(self, tokens, customize):
|
||||
'''
|
||||
"""
|
||||
Special handling for opcodes such as those that take a variable number
|
||||
of arguments -- we add a new rule for each:
|
||||
|
||||
@@ -271,7 +257,7 @@ class Python2Parser(PythonParser):
|
||||
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
|
||||
|
||||
PyPy adds custom rules here as well
|
||||
'''
|
||||
"""
|
||||
for opname, v in list(customize.items()):
|
||||
opname_base = opname[:opname.rfind('_')]
|
||||
if opname == 'PyPy':
|
||||
@@ -347,7 +333,7 @@ class Python2Parser(PythonParser):
|
||||
# always be the case.
|
||||
self.add_unique_rules([
|
||||
"stmt ::= tryfinallystmt_pypy",
|
||||
"tryfinallystmt_pypy ::= SETUP_FINALLY suite_stmts_opt COME_FROM "
|
||||
"tryfinallystmt_pypy ::= SETUP_FINALLY suite_stmts_opt COME_FROM_FINALLY "
|
||||
"suite_stmts_opt END_FINALLY"
|
||||
], customize)
|
||||
continue
|
||||
@@ -400,6 +386,26 @@ class Python2Parser(PythonParser):
|
||||
else:
|
||||
raise Exception('unknown customize token %s' % opname)
|
||||
self.add_unique_rule(rule, opname_base, v, customize)
|
||||
pass
|
||||
self.check_reduce['augassign1'] = 'AST'
|
||||
self.check_reduce['augassign2'] = 'AST'
|
||||
self.check_reduce['_stmts'] = 'AST'
|
||||
return
|
||||
|
||||
def reduce_is_invalid(self, rule, ast, tokens, first, last):
|
||||
lhs = rule[0]
|
||||
if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and':
|
||||
return True
|
||||
elif lhs == '_stmts':
|
||||
for i, stmt in enumerate(ast):
|
||||
if stmt == '_stmts':
|
||||
stmt = stmt[0]
|
||||
assert stmt == 'stmt'
|
||||
if stmt[0] == 'return_stmt':
|
||||
return i+1 != len(ast)
|
||||
pass
|
||||
return False
|
||||
return False
|
||||
|
||||
class Python2ParserSingle(Python2Parser, PythonParserSingle):
|
||||
pass
|
||||
|
@@ -13,7 +13,6 @@ class Python26Parser(Python2Parser):
|
||||
super(Python26Parser, self).__init__(debug_parser)
|
||||
self.customized = {}
|
||||
|
||||
|
||||
def p_try_except26(self, args):
|
||||
"""
|
||||
except_stmt ::= except_cond3 except_suite
|
||||
@@ -84,6 +83,7 @@ class Python26Parser(Python2Parser):
|
||||
jb_cont ::= CONTINUE
|
||||
|
||||
jb_cf_pop ::= JUMP_BACK come_froms POP_TOP
|
||||
jb_cf_pop ::= JUMP_BACK POP_TOP
|
||||
ja_cf_pop ::= JUMP_ABSOLUTE come_froms POP_TOP
|
||||
jf_cf_pop ::= JUMP_FORWARD come_froms POP_TOP
|
||||
|
||||
@@ -188,6 +188,8 @@ class Python26Parser(Python2Parser):
|
||||
|
||||
comp_body ::= gen_comp_body
|
||||
|
||||
for_block ::= l_stmts_opt _come_from POP_TOP JUMP_BACK
|
||||
|
||||
# Make sure we keep indices the same as 2.7
|
||||
setup_loop_lf ::= SETUP_LOOP LOAD_FAST
|
||||
genexpr_func ::= setup_loop_lf FOR_ITER designator comp_iter jb_bp_come_from
|
||||
@@ -243,8 +245,8 @@ if __name__ == '__main__':
|
||||
""".split()))
|
||||
remain_tokens = set(tokens) - opcode_set
|
||||
import re
|
||||
remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
|
||||
remain_tokens = set(remain_tokens) - opcode_set
|
||||
print(remain_tokens)
|
||||
# print(sorted(p.rule2name.items()))
|
||||
|
@@ -31,6 +31,10 @@ class Python27Parser(Python2Parser):
|
||||
|
||||
def p_try27(self, args):
|
||||
"""
|
||||
tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt
|
||||
POP_BLOCK LOAD_CONST
|
||||
COME_FROM_FINALLY suite_stmts_opt END_FINALLY
|
||||
|
||||
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
|
||||
try_middle else_suite COME_FROM
|
||||
|
||||
@@ -45,7 +49,10 @@ class Python27Parser(Python2Parser):
|
||||
|
||||
def p_jump27(self, args):
|
||||
"""
|
||||
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
|
||||
come_froms ::= come_froms COME_FROM
|
||||
come_froms ::= COME_FROM
|
||||
|
||||
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD come_froms
|
||||
bp_come_from ::= POP_BLOCK COME_FROM
|
||||
|
||||
# FIXME: Common with 3.0+
|
||||
@@ -77,15 +84,13 @@ class Python27Parser(Python2Parser):
|
||||
assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_2
|
||||
|
||||
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt
|
||||
POP_BLOCK LOAD_CONST COME_FROM
|
||||
POP_BLOCK LOAD_CONST COME_FROM_WITH
|
||||
WITH_CLEANUP END_FINALLY
|
||||
|
||||
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
|
||||
POP_BLOCK LOAD_CONST COME_FROM
|
||||
POP_BLOCK LOAD_CONST COME_FROM_WITH
|
||||
WITH_CLEANUP END_FINALLY
|
||||
|
||||
while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM
|
||||
|
||||
# Common with 2.6
|
||||
while1stmt ::= SETUP_LOOP return_stmts bp_come_from
|
||||
while1stmt ::= SETUP_LOOP return_stmts COME_FROM
|
||||
|
@@ -100,8 +100,7 @@ class Python3Parser(PythonParser):
|
||||
del_stmt ::= expr DELETE_ATTR
|
||||
|
||||
kwarg ::= LOAD_CONST expr
|
||||
kwargs ::= kwargs kwarg
|
||||
kwargs ::=
|
||||
kwargs ::= kwarg*
|
||||
|
||||
classdef ::= build_class designator
|
||||
|
||||
@@ -139,16 +138,23 @@ class Python3Parser(PythonParser):
|
||||
iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK
|
||||
iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK COME_FROM_LOOP
|
||||
|
||||
# These are used to keep AST indices the same
|
||||
jf_else ::= JUMP_FORWARD ELSE
|
||||
ja_else ::= JUMP_ABSOLUTE ELSE
|
||||
|
||||
# Note: in if/else kinds of statements, we err on the side
|
||||
# of missing "else" clauses. Therefore we include grammar
|
||||
# rules with and without ELSE.
|
||||
|
||||
ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM
|
||||
ifelsestmt ::= testexpr c_stmts_opt jf_else else_suite _come_from
|
||||
|
||||
ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec
|
||||
ifelsestmtc ::= testexpr c_stmts_opt ja_else else_suitec
|
||||
|
||||
ifelsestmtr ::= testexpr return_if_stmts return_stmts
|
||||
|
||||
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel
|
||||
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel JUMP_BACK COME_FROM_LOOP
|
||||
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel COME_FROM_LOOP
|
||||
|
||||
|
||||
# FIXME: this feels like a hack. Is it just 1 or two
|
||||
# COME_FROMs? the parsed tree for this and even with just the
|
||||
@@ -246,7 +252,6 @@ class Python3Parser(PythonParser):
|
||||
c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt
|
||||
"""
|
||||
|
||||
|
||||
def p_def_annotations3(self, args):
|
||||
"""
|
||||
# Annotated functions
|
||||
@@ -335,11 +340,12 @@ class Python3Parser(PythonParser):
|
||||
whilestmt ::= SETUP_LOOP testexpr return_stmts POP_BLOCK
|
||||
COME_FROM_LOOP
|
||||
|
||||
while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK
|
||||
else_suite
|
||||
|
||||
whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
|
||||
else_suite COME_FROM_LOOP
|
||||
|
||||
while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK
|
||||
else_suite
|
||||
|
||||
whileelselaststmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK
|
||||
else_suitec COME_FROM_LOOP
|
||||
@@ -348,14 +354,13 @@ class Python3Parser(PythonParser):
|
||||
|
||||
# FIXME: Python 3.? starts adding branch optimization? Put this starting there.
|
||||
while1stmt ::= SETUP_LOOP l_stmts
|
||||
while1stmt ::= SETUP_LOOP l_stmts COME_FROM_LOOP
|
||||
|
||||
# FIXME: investigate - can code really produce a NOP?
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP
|
||||
COME_FROM_LOOP
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP
|
||||
COME_FROM_LOOP
|
||||
whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP
|
||||
COME_FROM_LOOP
|
||||
forstmt ::= SETUP_LOOP expr _for designator for_block POP_BLOCK NOP
|
||||
COME_FROM_LOOP
|
||||
"""
|
||||
@@ -370,16 +375,17 @@ class Python3Parser(PythonParser):
|
||||
'''
|
||||
|
||||
def p_expr3(self, args):
|
||||
'''
|
||||
"""
|
||||
conditional ::= expr jmp_false expr jf_else expr COME_FROM
|
||||
conditionalnot ::= expr jmp_true expr jf_else expr COME_FROM
|
||||
|
||||
|
||||
expr ::= LOAD_CLASSNAME
|
||||
|
||||
# Python 3.4+
|
||||
expr ::= LOAD_CLASSDEREF
|
||||
|
||||
binary_subscr2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR
|
||||
# Python3 drops slice0..slice3
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def call_fn_name(token):
|
||||
@@ -442,10 +448,10 @@ class Python3Parser(PythonParser):
|
||||
args_kw = (token.attr >> 8) & 0xff
|
||||
nak = ( len(opname)-len('CALL_FUNCTION') ) // 3
|
||||
token.type = self.call_fn_name(token)
|
||||
rule = ('call_function ::= expr '
|
||||
+ ('pos_arg ' * args_pos)
|
||||
+ ('kwarg ' * args_kw)
|
||||
+ 'expr ' * nak + token.type)
|
||||
rule = ('call_function ::= expr ' +
|
||||
('pos_arg ' * args_pos) +
|
||||
('kwarg ' * args_kw) +
|
||||
'expr ' * nak + token.type)
|
||||
self.add_unique_rule(rule, token.type, args_pos, customize)
|
||||
rule = ('classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d'
|
||||
% (('expr ' * (args_pos-1)), opname, args_pos))
|
||||
@@ -584,9 +590,10 @@ class Python3Parser(PythonParser):
|
||||
self.add_unique_rule(rule, 'kvlist_n', 1, customize)
|
||||
rule = "mapexpr ::= BUILD_MAP_n kvlist_n"
|
||||
elif self.version >= 3.5:
|
||||
rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
rule = "mapexpr ::= %s %s" % (kvlist_n, opname)
|
||||
if opname != 'BUILD_MAP_WITH_CALL':
|
||||
rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
rule = "mapexpr ::= %s %s" % (kvlist_n, opname)
|
||||
else:
|
||||
rule = kvlist_n + ' ::= ' + 'expr expr STORE_MAP ' * token.attr
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
@@ -637,10 +644,10 @@ class Python3Parser(PythonParser):
|
||||
|
||||
# number of apply equiv arguments:
|
||||
nak = ( len(opname_base)-len('CALL_METHOD') ) // 3
|
||||
rule = ('call_function ::= expr '
|
||||
+ ('pos_arg ' * args_pos)
|
||||
+ ('kwarg ' * args_kw)
|
||||
+ 'expr ' * nak + opname)
|
||||
rule = ('call_function ::= expr ' +
|
||||
('pos_arg ' * args_pos) +
|
||||
('kwarg ' * args_kw) +
|
||||
'expr ' * nak + opname)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
elif opname.startswith('MAKE_CLOSURE'):
|
||||
# DRY with MAKE_FUNCTION
|
||||
@@ -684,8 +691,30 @@ class Python3Parser(PythonParser):
|
||||
rule = ('mkfunc ::= %sload_closure LOAD_CONST %s'
|
||||
% ('expr ' * args_pos, opname))
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
pass
|
||||
self.check_reduce['augassign1'] = 'AST'
|
||||
self.check_reduce['augassign2'] = 'AST'
|
||||
self.check_reduce['while1stmt'] = 'noAST'
|
||||
return
|
||||
|
||||
def reduce_is_invalid(self, rule, ast, tokens, first, last):
|
||||
lhs = rule[0]
|
||||
if lhs in ('augassign1', 'augassign2') and ast[0][0] == 'and':
|
||||
return True
|
||||
elif lhs == 'while1stmt':
|
||||
if tokens[last] in ('COME_FROM_LOOP', 'JUMP_BACK'):
|
||||
# jump_back should be right afer SETUP_LOOP. Test?
|
||||
last += 1
|
||||
while last < len(tokens) and isinstance(tokens[last].offset, str):
|
||||
last += 1
|
||||
if last < len(tokens):
|
||||
offset = tokens[last].offset
|
||||
assert tokens[first] == 'SETUP_LOOP'
|
||||
if offset != tokens[first].attr:
|
||||
return True
|
||||
return False
|
||||
return False
|
||||
|
||||
class Python30Parser(Python3Parser):
|
||||
|
||||
def p_30(self, args):
|
||||
|
@@ -10,9 +10,6 @@ from uncompyle6.parsers.parse3 import Python3Parser
|
||||
class Python32Parser(Python3Parser):
|
||||
def p_32to35(self, args):
|
||||
"""
|
||||
# In Python 3.2+, DUP_TOPX is DUP_TOP_TWO
|
||||
binary_subscr2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR
|
||||
|
||||
# Store locals is only in Python 3.0 to 3.3
|
||||
stmt ::= store_locals
|
||||
store_locals ::= LOAD_FAST STORE_LOCALS
|
||||
|
@@ -19,7 +19,6 @@ class Python33Parser(Python32Parser):
|
||||
# actions that want c_stmts_opt at index 1
|
||||
|
||||
iflaststmt ::= testexpr c_stmts_opt33
|
||||
iflaststmtl ::= testexpr c_stmts_opt
|
||||
c_stmts_opt33 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt
|
||||
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD _come_from
|
||||
|
||||
|
@@ -17,8 +17,6 @@ class Python34Parser(Python33Parser):
|
||||
"""
|
||||
# Python 3.4+ optimizes the trailing two JUMPS away
|
||||
|
||||
for_block ::= l_stmts
|
||||
|
||||
# Is this 3.4 only?
|
||||
yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM
|
||||
"""
|
||||
@@ -42,8 +40,8 @@ if __name__ == '__main__':
|
||||
""".split()))
|
||||
remain_tokens = set(tokens) - opcode_set
|
||||
import re
|
||||
remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
|
||||
remain_tokens = set(remain_tokens) - opcode_set
|
||||
print(remain_tokens)
|
||||
# print(sorted(p.rule2name.items()))
|
||||
|
@@ -45,14 +45,28 @@ class Python35Parser(Python34Parser):
|
||||
# Python 3.3+ also has yield from. 3.5 does it
|
||||
# differently than 3.3, 3.4
|
||||
|
||||
expr ::= yield_from
|
||||
yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM
|
||||
|
||||
# Python 3.4+ has more loop optimization that removes
|
||||
# JUMP_FORWARD in some cases, and hence we also don't
|
||||
# see COME_FROM
|
||||
_ifstmts_jump ::= c_stmts_opt
|
||||
"""
|
||||
|
||||
def add_custom_rules(self, tokens, customize):
|
||||
super(Python35Parser, self).add_custom_rules(tokens, customize)
|
||||
for i, token in enumerate(tokens):
|
||||
opname = token.type
|
||||
if opname == 'BUILD_MAP_UNPACK_WITH_CALL':
|
||||
nargs = token.attr % 256
|
||||
map_unpack_n = "map_unpack_%s" % nargs
|
||||
rule = map_unpack_n + ' ::= ' + 'expr ' * (nargs)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
rule = "unmapexpr ::= %s %s" % (map_unpack_n, opname)
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
call_token = tokens[i+1]
|
||||
if self.version == 3.5:
|
||||
rule = 'call_function ::= expr unmapexpr ' + call_token.type
|
||||
self.add_unique_rule(rule, opname, token.attr, customize)
|
||||
pass
|
||||
pass
|
||||
return
|
||||
|
||||
class Python35ParserSingle(Python35Parser, PythonParserSingle):
|
||||
pass
|
||||
|
||||
@@ -72,8 +86,8 @@ if __name__ == '__main__':
|
||||
""".split()))
|
||||
remain_tokens = set(tokens) - opcode_set
|
||||
import re
|
||||
remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens])
|
||||
remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
|
||||
remain_tokens = set(remain_tokens) - opcode_set
|
||||
print(remain_tokens)
|
||||
# print(sorted(p.rule2name.items()))
|
||||
|
@@ -17,8 +17,10 @@ class Python36Parser(Python35Parser):
|
||||
def p_36misc(self, args):
|
||||
"""
|
||||
fstring_multi ::= fstring_expr_or_strs BUILD_STRING
|
||||
fstring_expr_or_strs ::= fstring_expr_or_strs fstring_expr_or_str
|
||||
fstring_expr_or_strs ::= fstring_expr_or_str
|
||||
fstring_expr_or_strs ::= fstring_expr_or_str+
|
||||
|
||||
func_args36 ::= expr BUILD_TUPLE_0
|
||||
call_function ::= func_args36 unmapexpr CALL_FUNCTION_EX
|
||||
"""
|
||||
|
||||
def add_custom_rules(self, tokens, customize):
|
||||
@@ -47,6 +49,7 @@ class Python36Parser(Python35Parser):
|
||||
""" % (fstring_expr_or_str_n, fstring_expr_or_str_n, "fstring_expr_or_str " * v)
|
||||
self.add_unique_doc_rules(rules_str, customize)
|
||||
|
||||
|
||||
class Python36ParserSingle(Python36Parser, PythonParserSingle):
|
||||
pass
|
||||
|
||||
|
@@ -27,7 +27,8 @@ if PYTHON3:
|
||||
intern = sys.intern
|
||||
L65536 = 65536
|
||||
|
||||
def long(l): l
|
||||
def long(l):
|
||||
return l
|
||||
else:
|
||||
L65536 = long(65536) # NOQA
|
||||
|
||||
@@ -227,7 +228,7 @@ class Scanner(object):
|
||||
if op < self.opc.HAVE_ARGUMENT:
|
||||
return 1
|
||||
else:
|
||||
return 3
|
||||
return 2 if self.version >= 3.6 else 3
|
||||
|
||||
def remove_mid_line_ifs(self, ifs):
|
||||
"""
|
||||
|
@@ -93,11 +93,6 @@ class Scanner2(scan.Scanner):
|
||||
for instr in bytecode.get_instructions(co):
|
||||
print(instr._disassemble())
|
||||
|
||||
# from xdis.bytecode import Bytecode
|
||||
# bytecode = Bytecode(co, self.opc)
|
||||
# for instr in bytecode.get_instructions(co):
|
||||
# print(instr._disassemble())
|
||||
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
|
||||
@@ -165,15 +160,26 @@ class Scanner2(scan.Scanner):
|
||||
# we sort them). That way, specific COME_FROM tags will match up
|
||||
# properly. For example, a "loop" with an "if" nested in it should have the
|
||||
# "loop" tag last so the grammar rule matches that properly.
|
||||
# last_offset = -1
|
||||
for jump_offset in sorted(jump_targets[offset], reverse=True):
|
||||
# if jump_offset == last_offset:
|
||||
# continue
|
||||
# last_offset = jump_offset
|
||||
come_from_name = 'COME_FROM'
|
||||
op_name = self.opc.opname[self.code[jump_offset]]
|
||||
if op_name.startswith('SETUP_') and self.version == 2.7:
|
||||
come_from_type = op_name[len('SETUP_'):]
|
||||
if come_from_type not in ('LOOP', 'EXCEPT'):
|
||||
come_from_name = 'COME_FROM_%s' % come_from_type
|
||||
pass
|
||||
tokens.append(Token(
|
||||
'COME_FROM', None, repr(jump_offset),
|
||||
come_from_name, None, repr(jump_offset),
|
||||
offset="%s_%d" % (offset, jump_idx),
|
||||
has_arg = True))
|
||||
jump_idx += 1
|
||||
|
||||
op = self.code[offset]
|
||||
opname = self.opc.opname[op]
|
||||
op_name = self.opc.opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
has_arg = op_has_argument(op, self.opc)
|
||||
@@ -188,14 +194,14 @@ class Scanner2(scan.Scanner):
|
||||
if iscode(const):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert opname == 'LOAD_CONST'
|
||||
opname = 'LOAD_LAMBDA'
|
||||
assert op_name == 'LOAD_CONST'
|
||||
op_name = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
opname = 'LOAD_GENEXPR'
|
||||
op_name = 'LOAD_GENEXPR'
|
||||
elif const.co_name == '<dictcomp>':
|
||||
opname = 'LOAD_DICTCOMP'
|
||||
op_name = 'LOAD_DICTCOMP'
|
||||
elif const.co_name == '<setcomp>':
|
||||
opname = 'LOAD_SETCOMP'
|
||||
op_name = 'LOAD_SETCOMP'
|
||||
# verify() uses 'pattr' for comparison, since 'attr'
|
||||
# now holds Code(const) and thus can not be used
|
||||
# for comparison (todo: think about changing this)
|
||||
@@ -231,20 +237,20 @@ class Scanner2(scan.Scanner):
|
||||
self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE:
|
||||
continue
|
||||
else:
|
||||
if self.is_pypy and not oparg and opname == 'BUILD_MAP':
|
||||
opname = 'BUILD_MAP_n'
|
||||
if self.is_pypy and not oparg and op_name == 'BUILD_MAP':
|
||||
op_name = 'BUILD_MAP_n'
|
||||
else:
|
||||
opname = '%s_%d' % (opname, oparg)
|
||||
op_name = '%s_%d' % (op_name, oparg)
|
||||
if op != self.opc.BUILD_SLICE:
|
||||
customize[opname] = oparg
|
||||
elif self.is_pypy and opname in ('LOOKUP_METHOD',
|
||||
customize[op_name] = oparg
|
||||
elif self.is_pypy and op_name in ('LOOKUP_METHOD',
|
||||
'JUMP_IF_NOT_DEBUG',
|
||||
'SETUP_EXCEPT',
|
||||
'SETUP_FINALLY'):
|
||||
# The value in the dict is in special cases in semantic actions, such
|
||||
# as CALL_FUNCTION. The value is not used in these cases, so we put
|
||||
# in arbitrary value 0.
|
||||
customize[opname] = 0
|
||||
customize[op_name] = 0
|
||||
elif op == self.opc.JUMP_ABSOLUTE:
|
||||
# Further classify JUMP_ABSOLUTE into backward jumps
|
||||
# which are used in loops, and "CONTINUE" jumps which
|
||||
@@ -263,16 +269,16 @@ class Scanner2(scan.Scanner):
|
||||
and self.code[offset+3] not in (self.opc.END_FINALLY,
|
||||
self.opc.POP_BLOCK)
|
||||
and offset not in self.not_continue):
|
||||
opname = 'CONTINUE'
|
||||
op_name = 'CONTINUE'
|
||||
else:
|
||||
opname = 'JUMP_BACK'
|
||||
op_name = 'JUMP_BACK'
|
||||
|
||||
elif op == self.opc.LOAD_GLOBAL:
|
||||
if offset in self.load_asserts:
|
||||
opname = 'LOAD_ASSERT'
|
||||
op_name = 'LOAD_ASSERT'
|
||||
elif op == self.opc.RETURN_VALUE:
|
||||
if offset in self.return_end_ifs:
|
||||
opname = 'RETURN_END_IF'
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset in self.linestartoffsets:
|
||||
linestart = self.linestartoffsets[offset]
|
||||
@@ -281,7 +287,7 @@ class Scanner2(scan.Scanner):
|
||||
|
||||
if offset not in replace:
|
||||
tokens.append(Token(
|
||||
opname, oparg, pattr, offset, linestart, op,
|
||||
op_name, oparg, pattr, offset, linestart, op,
|
||||
has_arg, self.opc))
|
||||
else:
|
||||
tokens.append(Token(
|
||||
@@ -444,10 +450,16 @@ class Scanner2(scan.Scanner):
|
||||
if self.version < 2.7 and self.code[jmp] in self.jump_forward:
|
||||
self.not_continue.add(jmp)
|
||||
jmp = self.get_target(jmp)
|
||||
prev_offset = self.prev[except_match]
|
||||
# COMPARE_OP argument should be "exception match" or 10
|
||||
if (self.code[prev_offset] == self.opc.COMPARE_OP and
|
||||
self.code[prev_offset+1] != 10):
|
||||
return None
|
||||
if jmp not in self.pop_jump_if | self.jump_forward:
|
||||
self.ignore_if.add(except_match)
|
||||
return None
|
||||
|
||||
|
||||
self.ignore_if.add(except_match)
|
||||
self.not_continue.add(jmp)
|
||||
return jmp
|
||||
@@ -770,21 +782,23 @@ class Scanner2(scan.Scanner):
|
||||
if offset in self.ignore_if:
|
||||
return
|
||||
|
||||
if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \
|
||||
and pre[rtarget] != offset and pre[pre[rtarget]] != offset:
|
||||
if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK:
|
||||
if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE:
|
||||
pass
|
||||
elif self.get_target(pre[pre[rtarget]]) != target:
|
||||
pass
|
||||
if self.version == 2.7:
|
||||
if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \
|
||||
and pre[rtarget] != offset and pre[pre[rtarget]] != offset:
|
||||
if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK:
|
||||
if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE:
|
||||
pass
|
||||
elif self.get_target(pre[pre[rtarget]]) != target:
|
||||
pass
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
|
||||
# Does the "if" jump just beyond a jump op, then this is probably an if statement
|
||||
pre_rtarget = pre[rtarget]
|
||||
code_pre_rtarget = code[pre_rtarget]
|
||||
|
||||
if code_pre_rtarget in self.jump_forward:
|
||||
if_end = self.get_target(pre_rtarget)
|
||||
|
||||
@@ -812,6 +826,7 @@ class Scanner2(scan.Scanner):
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start-3,
|
||||
'end': pre_rtarget})
|
||||
|
||||
self.not_continue.add(pre_rtarget)
|
||||
|
||||
if rtarget < end:
|
||||
@@ -887,7 +902,6 @@ class Scanner2(scan.Scanner):
|
||||
pass
|
||||
pass
|
||||
|
||||
|
||||
# FIXME: All the < 2.7 conditions are is horrible. We need a better way.
|
||||
if label is not None and label != -1:
|
||||
# In Python < 2.7, the POP_TOP in:
|
||||
|
@@ -25,5 +25,5 @@ class Scanner23(scan.Scanner24):
|
||||
# These are the only differences in initialization between
|
||||
# 2.3-2.6
|
||||
self.version = 2.3
|
||||
self.genexpr_name = '<generator expression>';
|
||||
self.genexpr_name = '<generator expression>'
|
||||
return
|
||||
|
@@ -25,5 +25,5 @@ class Scanner24(scan.Scanner25):
|
||||
self.opc = opcode_24
|
||||
self.opname = opcode_24.opname
|
||||
self.version = 2.4
|
||||
self.genexpr_name = '<generator expression>';
|
||||
self.genexpr_name = '<generator expression>'
|
||||
return
|
||||
|
@@ -233,7 +233,7 @@ class Scanner26(scan.Scanner2):
|
||||
if op != self.opc.BUILD_SLICE:
|
||||
customize[op_name] = oparg
|
||||
elif op == self.opc.JUMP_ABSOLUTE:
|
||||
# Further classifhy JUMP_ABSOLUTE into backward jumps
|
||||
# Further classify JUMP_ABSOLUTE into backward jumps
|
||||
# which are used in loops, and "CONTINUE" jumps which
|
||||
# may appear in a "continue" statement. The loop-type
|
||||
# and continue-type jumps will help us classify loop
|
||||
@@ -254,6 +254,9 @@ class Scanner26(scan.Scanner2):
|
||||
# if x: continue
|
||||
# the "continue" is not on a new line.
|
||||
if tokens[-1].type == 'JUMP_BACK':
|
||||
# We need 'intern' since we have
|
||||
# already have processed the previous
|
||||
# token.
|
||||
tokens[-1].type = intern('CONTINUE')
|
||||
|
||||
elif op == self.opc.LOAD_GLOBAL:
|
||||
|
@@ -128,7 +128,6 @@ class Scanner3(Scanner):
|
||||
varargs_ops.add(self.opc.CALL_METHOD)
|
||||
self.varargs_ops = frozenset(varargs_ops)
|
||||
|
||||
|
||||
def opName(self, offset):
|
||||
return self.opc.opname[self.code[offset]]
|
||||
|
||||
@@ -226,6 +225,14 @@ class Scanner3(Scanner):
|
||||
jump_idx += 1
|
||||
pass
|
||||
pass
|
||||
elif inst.offset in self.else_start:
|
||||
end_offset = self.else_start[inst.offset]
|
||||
tokens.append(Token('ELSE',
|
||||
None, repr(end_offset),
|
||||
offset='%s' % (inst.offset),
|
||||
has_arg = True, opc=self.opc))
|
||||
|
||||
pass
|
||||
|
||||
pattr = inst.argrepr
|
||||
opname = inst.opname
|
||||
@@ -324,9 +331,10 @@ class Scanner3(Scanner):
|
||||
# FIXME: this is a hack to catch stuff like:
|
||||
# if x: continue
|
||||
# the "continue" is not on a new line.
|
||||
# There are other situations were we don't catch
|
||||
# There are other situations where we don't catch
|
||||
# CONTINUE as well.
|
||||
if tokens[-1].type == 'JUMP_BACK':
|
||||
if tokens[-1].type == 'JUMP_BACK' and tokens[-1].attr <= argval:
|
||||
# intern is used because we are changing the *previous* token
|
||||
tokens[-1].type = intern('CONTINUE')
|
||||
|
||||
elif op == self.opc.RETURN_VALUE:
|
||||
@@ -424,6 +432,7 @@ class Scanner3(Scanner):
|
||||
self.fixed_jumps = {}
|
||||
self.ignore_if = set()
|
||||
self.build_statement_indices()
|
||||
self.else_start = {}
|
||||
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
@@ -553,9 +562,15 @@ class Scanner3(Scanner):
|
||||
Get target offset for op located at given <offset>.
|
||||
"""
|
||||
op = self.code[offset]
|
||||
target = self.code[offset+1] + self.code[offset+2] * 256
|
||||
if op in op3.hasjrel:
|
||||
target += offset + 3
|
||||
if self.version >= 3.6:
|
||||
target = self.code[offset+1]
|
||||
if op in op3.hasjrel:
|
||||
target += offset + 2
|
||||
else:
|
||||
target = self.code[offset+1] + self.code[offset+2] * 256
|
||||
if op in op3.hasjrel:
|
||||
target += offset + 3
|
||||
|
||||
return target
|
||||
|
||||
def detect_structure(self, offset, targets):
|
||||
@@ -758,15 +773,28 @@ class Scanner3(Scanner):
|
||||
code[prev_op[prev_op[rtarget]]] != self.opc.JUMP_ABSOLUTE)):
|
||||
rtarget = prev_op[rtarget]
|
||||
|
||||
# Does the "if" jump just beyond a jump op, then this can be
|
||||
# a block inside an "if" statement
|
||||
# Does the "jump if" jump beyond a jump op?
|
||||
# That is, we have something like:
|
||||
# POP_JUMP_IF_FALSE HERE
|
||||
# ...
|
||||
# JUMP_FORWARD
|
||||
# HERE:
|
||||
#
|
||||
# If so, this can be block inside an "if" statement
|
||||
# or a conditional assignment like:
|
||||
# x = 1 if x else 2
|
||||
#
|
||||
# There are other contexts we may need to consider
|
||||
# like whether the target is "END_FINALLY"
|
||||
# or if the condition jump is to a forward location
|
||||
if self.is_jump_forward(prev_op[rtarget]):
|
||||
if_end = self.get_target(prev_op[rtarget])
|
||||
rrtarget = prev_op[rtarget]
|
||||
if_end = self.get_target(rrtarget)
|
||||
|
||||
# Is this a loop and not an "if" statement?
|
||||
if ((if_end < prev_op[rtarget]) and
|
||||
# If the jump target is back, we are looping
|
||||
if (if_end < rrtarget and
|
||||
(code[prev_op[if_end]] == self.opc.SETUP_LOOP)):
|
||||
if(if_end > start):
|
||||
if (if_end > start):
|
||||
return
|
||||
|
||||
end = self.restrict_to_parent(if_end, parent)
|
||||
@@ -776,10 +804,15 @@ class Scanner3(Scanner):
|
||||
'end': prev_op[rtarget]})
|
||||
self.not_continue.add(prev_op[rtarget])
|
||||
|
||||
if rtarget < end:
|
||||
self.structs.append({'type': 'if-else',
|
||||
if rtarget < end and (
|
||||
code[rtarget] not in (self.opc.END_FINALLY,
|
||||
self.opc.JUMP_ABSOLUTE) and
|
||||
code[prev_op[rrtarget]] not in (self.opc.POP_EXCEPT,
|
||||
self.opc.END_FINALLY)):
|
||||
self.structs.append({'type': 'else',
|
||||
'start': rtarget,
|
||||
'end': end})
|
||||
self.else_start[rtarget] = end
|
||||
elif code[prev_op[rtarget]] == self.opc.RETURN_VALUE:
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
@@ -869,7 +902,9 @@ class Scanner3(Scanner):
|
||||
op = self.code[i]
|
||||
if op == self.opc.END_FINALLY:
|
||||
if count_END_FINALLY == count_SETUP_:
|
||||
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE)
|
||||
assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE,
|
||||
JUMP_FORWARD,
|
||||
RETURN_VALUE)
|
||||
self.not_continue.add(self.prev_op[i])
|
||||
return self.prev_op[i]
|
||||
count_END_FINALLY += 1
|
||||
|
@@ -29,7 +29,7 @@ class Token:
|
||||
self.pattr = pattr
|
||||
self.offset = offset
|
||||
self.linestart = linestart
|
||||
if has_arg == False:
|
||||
if has_arg is False:
|
||||
self.attr = None
|
||||
self.pattr = None
|
||||
self.opc = opc
|
||||
|
@@ -61,6 +61,7 @@ from uncompyle6.semantics import pysource
|
||||
from uncompyle6 import parser
|
||||
from uncompyle6.scanner import Token, Code, get_scanner
|
||||
from uncompyle6.semantics.check_ast import checker
|
||||
from uncompyle6.semantics.helper import print_docstring
|
||||
|
||||
from uncompyle6.show import (
|
||||
maybe_show_asm,
|
||||
@@ -582,7 +583,7 @@ class FragmentsWalker(pysource.SourceWalker, object):
|
||||
self.set_pos_info(node[-3], start, len(self.f.getvalue()))
|
||||
start = len(self.f.getvalue())
|
||||
self.preorder(ast[iter_index])
|
||||
self.set_pos_info(iter_index, start, len(self.f.getvalue()))
|
||||
self.set_pos_info(ast[iter_index], start, len(self.f.getvalue()))
|
||||
self.prec = p
|
||||
|
||||
def comprehension_walk3(self, node, iter_index, code_index=-5):
|
||||
@@ -1687,7 +1688,7 @@ class FragmentsWalker(pysource.SourceWalker, object):
|
||||
|
||||
if len(code.co_consts)>0 and code.co_consts[0] is not None and not isLambda: # ugly
|
||||
# docstring exists, dump it
|
||||
self.print_docstring(indent, code.co_consts[0])
|
||||
print_docstring(self, indent, code.co_consts[0])
|
||||
|
||||
code._tokens = None # save memory
|
||||
assert ast == 'stmts'
|
||||
|
135
uncompyle6/semantics/helper.py
Normal file
135
uncompyle6/semantics/helper.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import sys
|
||||
from uncompyle6 import PYTHON3
|
||||
if PYTHON3:
|
||||
minint = -sys.maxsize-1
|
||||
maxint = sys.maxsize
|
||||
else:
|
||||
minint = -sys.maxint-1
|
||||
maxint = sys.maxint
|
||||
|
||||
def print_docstring(self, indent, docstring):
|
||||
try:
|
||||
if docstring.find('"""') == -1:
|
||||
quote = '"""'
|
||||
else:
|
||||
quote = "'''"
|
||||
except:
|
||||
return False
|
||||
self.write(indent)
|
||||
if not PYTHON3 and not isinstance(docstring, str):
|
||||
# Must be unicode in Python2
|
||||
self.write('u')
|
||||
docstring = repr(docstring.expandtabs())[2:-1]
|
||||
else:
|
||||
docstring = repr(docstring.expandtabs())[1:-1]
|
||||
|
||||
for (orig, replace) in (('\\\\', '\t'),
|
||||
('\\r\\n', '\n'),
|
||||
('\\n', '\n'),
|
||||
('\\r', '\n'),
|
||||
('\\"', '"'),
|
||||
("\\'", "'")):
|
||||
docstring = docstring.replace(orig, replace)
|
||||
|
||||
# Do a raw string if there are backslashes but no other escaped characters:
|
||||
# also check some edge cases
|
||||
if ('\t' in docstring
|
||||
and '\\' not in docstring
|
||||
and len(docstring) >= 2
|
||||
and docstring[-1] != '\t'
|
||||
and (docstring[-1] != '"'
|
||||
or docstring[-2] == '\t')):
|
||||
self.write('r') # raw string
|
||||
# restore backslashes unescaped since raw
|
||||
docstring = docstring.replace('\t', '\\')
|
||||
else:
|
||||
# Escape '"' if it's the last character, so it doesn't
|
||||
# ruin the ending triple quote
|
||||
if len(docstring) and docstring[-1] == '"':
|
||||
docstring = docstring[:-1] + '\\"'
|
||||
# Restore escaped backslashes
|
||||
docstring = docstring.replace('\t', '\\\\')
|
||||
# Escape triple quote when needed
|
||||
if quote == '""""':
|
||||
docstring = docstring.replace('"""', '\\"\\"\\"')
|
||||
lines = docstring.split('\n')
|
||||
calculate_indent = maxint
|
||||
for line in lines[1:]:
|
||||
stripped = line.lstrip()
|
||||
if len(stripped) > 0:
|
||||
calculate_indent = min(calculate_indent, len(line) - len(stripped))
|
||||
calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip()))
|
||||
# Remove indentation (first line is special):
|
||||
trimmed = [lines[0]]
|
||||
if calculate_indent < maxint:
|
||||
trimmed += [line[calculate_indent:] for line in lines[1:]]
|
||||
|
||||
self.write(quote)
|
||||
if len(trimmed) == 0:
|
||||
self.println(quote)
|
||||
elif len(trimmed) == 1:
|
||||
self.println(trimmed[0], quote)
|
||||
else:
|
||||
self.println(trimmed[0])
|
||||
for line in trimmed[1:-1]:
|
||||
self.println( indent, line )
|
||||
self.println(indent, trimmed[-1], quote)
|
||||
return True
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# if PYTHON3:
|
||||
# from io import StringIO
|
||||
# else:
|
||||
# from StringIO import StringIO
|
||||
# class PrintFake():
|
||||
# def __init__(self):
|
||||
# self.pending_newlines = 0
|
||||
# self.f = StringIO()
|
||||
|
||||
# def write(self, *data):
|
||||
# if (len(data) == 0) or (len(data) == 1 and data[0] == ''):
|
||||
# return
|
||||
# out = ''.join((str(j) for j in data))
|
||||
# n = 0
|
||||
# for i in out:
|
||||
# if i == '\n':
|
||||
# n += 1
|
||||
# if n == len(out):
|
||||
# self.pending_newlines = max(self.pending_newlines, n)
|
||||
# return
|
||||
# elif n:
|
||||
# self.pending_newlines = max(self.pending_newlines, n)
|
||||
# out = out[n:]
|
||||
# break
|
||||
# else:
|
||||
# break
|
||||
|
||||
# if self.pending_newlines > 0:
|
||||
# self.f.write('\n'*self.pending_newlines)
|
||||
# self.pending_newlines = 0
|
||||
|
||||
# for i in out[::-1]:
|
||||
# if i == '\n':
|
||||
# self.pending_newlines += 1
|
||||
# else:
|
||||
# break
|
||||
|
||||
# if self.pending_newlines:
|
||||
# out = out[:-self.pending_newlines]
|
||||
# self.f.write(out)
|
||||
# def println(self, *data):
|
||||
# if data and not(len(data) == 1 and data[0] ==''):
|
||||
# self.write(*data)
|
||||
# self.pending_newlines = max(self.pending_newlines, 1)
|
||||
# return
|
||||
# pass
|
||||
|
||||
# for doc in (
|
||||
# "Now is the time",
|
||||
# r'''func placeholder - with ("""\nstring\n""")''',
|
||||
# r'''func placeholder - ' and with ("""\nstring\n""")''',
|
||||
# r"""func placeholder - ' and with ('''\nstring\n''') and \"\"\"\nstring\n\"\"\" """
|
||||
# ):
|
||||
# o = PrintFake()
|
||||
# print_docstring(o, ' ', doc)
|
||||
# print(o.f.getvalue())
|
@@ -8,6 +8,7 @@ from uncompyle6.scanner import Code
|
||||
from uncompyle6.parsers.astnode import AST
|
||||
from uncompyle6 import PYTHON3
|
||||
from uncompyle6.semantics.parser_error import ParserError
|
||||
from uncompyle6.semantics.helper import print_docstring
|
||||
|
||||
if PYTHON3:
|
||||
from itertools import zip_longest
|
||||
@@ -37,7 +38,7 @@ def find_globals(node, globs):
|
||||
def find_none(node):
|
||||
for n in node:
|
||||
if isinstance(n, AST):
|
||||
if not n in ('return_stmt', 'return_if_stmt'):
|
||||
if n not in ('return_stmt', 'return_if_stmt'):
|
||||
if find_none(n):
|
||||
return True
|
||||
elif n.type == 'LOAD_CONST' and n.pattr is None:
|
||||
@@ -218,7 +219,7 @@ def make_function3_annotate(self, node, isLambda, nested=1,
|
||||
if (len(code.co_consts) > 0 and
|
||||
code.co_consts[0] is not None and not isLambda): # ugly
|
||||
# docstring exists, dump it
|
||||
self.print_docstring(indent, code.co_consts[0])
|
||||
print_docstring(self, indent, code.co_consts[0])
|
||||
|
||||
code._tokens = None # save memory
|
||||
assert ast == 'stmts'
|
||||
@@ -353,7 +354,7 @@ def make_function2(self, node, isLambda, nested=1, codeNode=None):
|
||||
|
||||
if len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda: # ugly
|
||||
# docstring exists, dump it
|
||||
self.print_docstring(indent, code.co_consts[0])
|
||||
print_docstring(self, indent, code.co_consts[0])
|
||||
|
||||
code._tokens = None # save memory
|
||||
assert ast == 'stmts'
|
||||
@@ -542,7 +543,7 @@ def make_function3(self, node, isLambda, nested=1, codeNode=None):
|
||||
|
||||
if len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda: # ugly
|
||||
# docstring exists, dump it
|
||||
self.print_docstring(self.indent, code.co_consts[0])
|
||||
print_docstring(self, self.indent, code.co_consts[0])
|
||||
|
||||
code._tokens = None # save memory
|
||||
assert ast == 'stmts'
|
||||
|
@@ -83,6 +83,7 @@ from uncompyle6.semantics.make_function import (
|
||||
make_function2, make_function3, make_function3_annotate, find_globals)
|
||||
from uncompyle6.semantics.parser_error import ParserError
|
||||
from uncompyle6.semantics.check_ast import checker
|
||||
from uncompyle6.semantics.helper import print_docstring
|
||||
|
||||
from uncompyle6.show import (
|
||||
maybe_show_ast,
|
||||
@@ -351,7 +352,7 @@ MAP_R = (TABLE_R, -1)
|
||||
|
||||
MAP = {
|
||||
'stmt': MAP_R,
|
||||
'call_function': MAP_R,
|
||||
'call_function': MAP_R,
|
||||
'del_stmt': MAP_R,
|
||||
'designator': MAP_R,
|
||||
'exprlist': MAP_R0,
|
||||
@@ -486,6 +487,12 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
|
||||
return
|
||||
|
||||
def indent_if_source_nl(self, line_number, indent):
|
||||
if (line_number != self.line_number):
|
||||
self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1])
|
||||
return self.line_number
|
||||
|
||||
|
||||
def customize_for_version(self, is_pypy, version):
|
||||
if is_pypy:
|
||||
########################
|
||||
@@ -623,6 +630,19 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
TABLE_DIRECT.update({
|
||||
'LOAD_CLASSDEREF': ( '%{pattr}', ),
|
||||
})
|
||||
if version >= 3.5:
|
||||
def n_unmapexpr(node):
|
||||
last_n = node[0][-1]
|
||||
for n in node[0]:
|
||||
self.preorder(n)
|
||||
if n != last_n:
|
||||
self.f.write(', **')
|
||||
pass
|
||||
pass
|
||||
self.prune()
|
||||
pass
|
||||
self.n_unmapexpr = n_unmapexpr
|
||||
|
||||
if version >= 3.6:
|
||||
########################
|
||||
# Python 3.6+ Additions
|
||||
@@ -634,6 +654,7 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
})
|
||||
|
||||
FSTRING_CONVERSION_MAP = {1: '!s', 2: '!r', 3: '!a'}
|
||||
|
||||
def f_conversion(node):
|
||||
node.conversion = FSTRING_CONVERSION_MAP.get(node.data[1].attr, '')
|
||||
|
||||
@@ -645,7 +666,6 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
def n_fstring_single(node):
|
||||
f_conversion(node)
|
||||
self.default(node)
|
||||
|
||||
self.n_fstring_single = n_fstring_single
|
||||
|
||||
return
|
||||
@@ -671,9 +691,8 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
None)
|
||||
|
||||
def set_pos_info(self, node):
|
||||
if hasattr(node, 'offset'):
|
||||
if node.offset in self.linestarts:
|
||||
self.line_number = self.linestarts[node.offset]
|
||||
if hasattr(node, 'linestart') and node.linestart:
|
||||
self.line_number = node.linestart
|
||||
|
||||
def preorder(self, node=None):
|
||||
super(SourceWalker, self).preorder(node)
|
||||
@@ -740,73 +759,6 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
self.write(*data)
|
||||
self.pending_newlines = max(self.pending_newlines, 1)
|
||||
|
||||
def print_docstring(self, indent, docstring):
|
||||
## FIXME: put this into a testable function.
|
||||
if docstring.find('"""') == -1:
|
||||
quote = '"""'
|
||||
else:
|
||||
quote = "'''"
|
||||
|
||||
self.write(indent)
|
||||
if not PYTHON3 and not isinstance(docstring, str):
|
||||
# Must be unicode in Python2
|
||||
self.write('u')
|
||||
docstring = repr(docstring.expandtabs())[2:-1]
|
||||
else:
|
||||
docstring = repr(docstring.expandtabs())[1:-1]
|
||||
|
||||
for (orig, replace) in (('\\\\', '\t'),
|
||||
('\\r\\n', '\n'),
|
||||
('\\n', '\n'),
|
||||
('\\r', '\n'),
|
||||
('\\"', '"'),
|
||||
("\\'", "'")):
|
||||
docstring = docstring.replace(orig, replace)
|
||||
|
||||
# Do a raw string if there are backslashes but no other escaped characters:
|
||||
# also check some edge cases
|
||||
if ('\t' in docstring
|
||||
and '\\' not in docstring
|
||||
and len(docstring) >= 2
|
||||
and docstring[-1] != '\t'
|
||||
and (docstring[-1] != '"'
|
||||
or docstring[-2] == '\t')):
|
||||
self.write('r') # raw string
|
||||
# restore backslashes unescaped since raw
|
||||
docstring = docstring.replace('\t', '\\')
|
||||
else:
|
||||
# Escape '"' if it's the last character, so it doesn't
|
||||
# ruin the ending triple quote
|
||||
if len(docstring) and docstring[-1] == '"':
|
||||
docstring = docstring[:-1] + '\\"'
|
||||
# Restore escaped backslashes
|
||||
docstring = docstring.replace('\t', '\\\\')
|
||||
# Escape triple quote when needed
|
||||
if quote == '""""':
|
||||
docstring = docstring.replace('"""', '\\"\\"\\"')
|
||||
lines = docstring.split('\n')
|
||||
calculate_indent = maxint
|
||||
for line in lines[1:]:
|
||||
stripped = line.lstrip()
|
||||
if len(stripped) > 0:
|
||||
calculate_indent = min(calculate_indent, len(line) - len(stripped))
|
||||
calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip()))
|
||||
# Remove indentation (first line is special):
|
||||
trimmed = [lines[0]]
|
||||
if calculate_indent < maxint:
|
||||
trimmed += [line[calculate_indent:] for line in lines[1:]]
|
||||
|
||||
self.write(quote)
|
||||
if len(trimmed) == 0:
|
||||
self.println(quote)
|
||||
elif len(trimmed) == 1:
|
||||
self.println(trimmed[0], quote)
|
||||
else:
|
||||
self.println(trimmed[0])
|
||||
for line in trimmed[1:-1]:
|
||||
self.println( indent, line )
|
||||
self.println(indent, trimmed[-1], quote)
|
||||
|
||||
def is_return_none(self, node):
|
||||
# Is there a better way?
|
||||
ret = (node[0] == 'ret_expr'
|
||||
@@ -958,7 +910,6 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
pass
|
||||
self.write(')')
|
||||
|
||||
|
||||
def n_LOAD_CONST(self, node):
|
||||
data = node.pattr; datatype = type(data)
|
||||
if isinstance(datatype, int) and data == minint:
|
||||
@@ -1195,6 +1146,7 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
assert n == 'lc_body'
|
||||
self.write( '[ ')
|
||||
|
||||
|
||||
if self.version >= 2.7:
|
||||
expr = n[0]
|
||||
list_iter = node[-1]
|
||||
@@ -1207,9 +1159,19 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
|
||||
# FIXME: use source line numbers for directing line breaks
|
||||
|
||||
line_number = self.line_number
|
||||
last_line = self.f.getvalue().split("\n")[-1]
|
||||
l = len(last_line)
|
||||
indent = ' ' * (l-1)
|
||||
|
||||
self.preorder(expr)
|
||||
line_number = self.indent_if_source_nl(line_number, indent)
|
||||
self.preorder(list_iter)
|
||||
self.write( ' ]')
|
||||
l2 = self.indent_if_source_nl(line_number, indent)
|
||||
if l2 != line_number:
|
||||
self.write(' ' * (len(indent) - len(self.indent) - 1) + ']')
|
||||
else:
|
||||
self.write( ' ]')
|
||||
self.prec = p
|
||||
self.prune() # stop recursing
|
||||
|
||||
@@ -1697,9 +1659,8 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
self.write(sep)
|
||||
name = self.traverse(l[i], indent='')
|
||||
if i > 0:
|
||||
if (line_number != self.line_number):
|
||||
self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1])
|
||||
pass
|
||||
line_number = self.indent_if_source_nl(line_number,
|
||||
self.indent + INDENT_PER_LEVEL[:-1])
|
||||
line_number = self.line_number
|
||||
self.write(name, ': ')
|
||||
value = self.traverse(l[i+1], indent=self.indent+(len(name)+2)*' ')
|
||||
@@ -1724,9 +1685,8 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
self.write(sep)
|
||||
name = self.traverse(l[i+1], indent='')
|
||||
if i > 0:
|
||||
if (line_number != self.line_number):
|
||||
self.write("\n" + self.indent + INDENT_PER_LEVEL[:-1])
|
||||
pass
|
||||
line_number = self.indent_if_source_nl(line_number,
|
||||
self.indent + INDENT_PER_LEVEL[:-1])
|
||||
pass
|
||||
line_number = self.line_number
|
||||
self.write(name, ': ')
|
||||
@@ -1755,13 +1715,12 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
# kv3 ::= expr expr STORE_MAP
|
||||
|
||||
# FIXME: DRY this and the above
|
||||
indent = self.indent + " "
|
||||
if kv == 'kv':
|
||||
self.write(sep)
|
||||
name = self.traverse(kv[-2], indent='')
|
||||
if first_time:
|
||||
if (line_number != self.line_number):
|
||||
self.write("\n" + self.indent + " ")
|
||||
pass
|
||||
line_number = self.indent_if_source_nl(line_number, indent)
|
||||
first_time = False
|
||||
pass
|
||||
line_number = self.line_number
|
||||
@@ -1771,9 +1730,7 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
self.write(sep)
|
||||
name = self.traverse(kv[1], indent='')
|
||||
if first_time:
|
||||
if (line_number != self.line_number):
|
||||
self.write("\n" + self.indent + " ")
|
||||
pass
|
||||
line_number = self.indent_if_source_nl(line_number, indent)
|
||||
first_time = False
|
||||
pass
|
||||
line_number = self.line_number
|
||||
@@ -1783,9 +1740,7 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
self.write(sep)
|
||||
name = self.traverse(kv[-2], indent='')
|
||||
if first_time:
|
||||
if (line_number != self.line_number):
|
||||
self.write("\n" + self.indent + " ")
|
||||
pass
|
||||
line_number = self.indent_if_source_nl(line_number, indent)
|
||||
first_time = False
|
||||
pass
|
||||
line_number = self.line_number
|
||||
@@ -1956,18 +1911,9 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
node[0].attr == 1):
|
||||
self.write(',')
|
||||
elif typ == 'c':
|
||||
# FIXME: In Python3 sometimes like from
|
||||
# importfrom
|
||||
# importlist2
|
||||
# import_as
|
||||
# designator
|
||||
# STORE_NAME 'load_entry_point'
|
||||
# POP_TOP '' (2, (0, 1))
|
||||
# we get that weird POP_TOP tuple, e.g (2, (0,1)).
|
||||
# Why? and
|
||||
# Is there some sort of invalid bounds access going on?
|
||||
if isinstance(entry[arg], int):
|
||||
self.preorder(node[entry[arg]])
|
||||
entry_node = node[entry[arg]]
|
||||
self.preorder(entry_node)
|
||||
arg += 1
|
||||
elif typ == 'p':
|
||||
p = self.prec
|
||||
@@ -2186,9 +2132,9 @@ class SourceWalker(GenericASTTraversal, object):
|
||||
docstring = ast[i][0][0][0][0].pattr
|
||||
except:
|
||||
docstring = code.co_consts[0]
|
||||
self.print_docstring(indent, docstring)
|
||||
self.println()
|
||||
del ast[i]
|
||||
if print_docstring(self, indent, docstring):
|
||||
self.println()
|
||||
del ast[i]
|
||||
|
||||
|
||||
# the function defining a class normally returns locals(); we
|
||||
@@ -2313,7 +2259,7 @@ def deparse_code(version, co, out=sys.stdout, showasm=None, showast=False,
|
||||
# convert leading '__doc__ = "..." into doc string
|
||||
try:
|
||||
if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
|
||||
deparsed.print_docstring('', co.co_consts[0])
|
||||
print_docstring(deparsed, '', co.co_consts[0])
|
||||
del deparsed.ast[0]
|
||||
if deparsed.ast[-1] == RETURN_NONE:
|
||||
deparsed.ast.pop() # remove last node
|
||||
|
@@ -317,7 +317,10 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2,
|
||||
i2 += 2
|
||||
continue
|
||||
elif tokens1[i1].type == 'LOAD_NAME' and tokens2[i2].type == 'LOAD_CONST' \
|
||||
and tokens1[i1].pattr == 'None' and tokens2[i2].pattr == None:
|
||||
and tokens1[i1].pattr == 'None' and tokens2[i2].pattr is None:
|
||||
pass
|
||||
elif tokens1[i1].type == 'RETURN_VALUE' and \
|
||||
tokens2[i2].type == 'RETURN_END_IF':
|
||||
pass
|
||||
else:
|
||||
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
|
||||
@@ -353,6 +356,9 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2,
|
||||
if is_pypy:
|
||||
# For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
|
||||
flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8
|
||||
# We also don't care about COROUTINE or GENERATOR for now
|
||||
flags1 &= ~0x000000a0
|
||||
flags2 &= ~0x000000a0
|
||||
if flags1 != flags2:
|
||||
raise CmpErrorMember(name, 'co_flags',
|
||||
pretty_flags(flags1),
|
||||
|
@@ -1,3 +1,3 @@
|
||||
# This file is suitable for sourcing inside bash as
|
||||
# well as importing into Python
|
||||
VERSION='2.9.6'
|
||||
VERSION='2.9.8'
|
||||
|
Reference in New Issue
Block a user