From d14865c1be536312effa39c589616da5ea9615d9 Mon Sep 17 00:00:00 2001 From: Daniel Bradburn Date: Thu, 28 Jul 2016 09:07:12 +0200 Subject: [PATCH 1/8] Starting adding python 3.6 support to uncompyle --- README.rst | 4 +-- test/Makefile | 10 +++++- test/bytecode_3.6/fstring.py | 5 +++ test/bytecode_3.6/fstring_single.py | 2 ++ test/test_pythonlib.py | 4 +-- uncompyle6/bin/uncompile.py | 2 +- uncompyle6/parser.py | 5 +++ uncompyle6/parsers/parse3.py | 49 +++++++++++++++++++++++++++++ uncompyle6/scanner.py | 2 +- uncompyle6/scanners/scanner36.py | 35 +++++++++++++++++++++ uncompyle6/semantics/pysource.py | 1 - uncompyle6/verify.py | 3 ++ 12 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 test/bytecode_3.6/fstring.py create mode 100644 test/bytecode_3.6/fstring_single.py create mode 100644 uncompyle6/scanners/scanner36.py diff --git a/README.rst b/README.rst index 65b1dc97..19cf8d65 100644 --- a/README.rst +++ b/README.rst @@ -11,9 +11,9 @@ Introduction ------------ *uncompyle6* translates Python bytecode back into equivalent Python -source code. It accepts bytecodes from Python version 2.3 to 3.5 or +source code. It accepts bytecodes from Python version 2.3 to 3.6 or so. The code requires Python 2.6 or later and has been tested on Python -running versions 2.3-2.7, and 3.2-3.5. +running versions 2.3-2.7, and 3.2-3.6. Why this? --------- diff --git a/test/Makefile b/test/Makefile index 06036d1e..8f3cbe6b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -38,6 +38,10 @@ check-3.4: check-bytecode check-3.4-ok check-2.7-ok check-3.5: check-bytecode $(PYTHON) test_pythonlib.py --bytecode-3.5 --verify $(COMPILE) +#: Run working tests from Python 3.6 +check-3.6: check-bytecode + $(PYTHON) test_pythonlib.py --bytecode-3.6 --verify $(COMPILE) + #: Check deparsing only, but from a different Python version check-disasm: $(PYTHON) dis-compare.py @@ -48,7 +52,7 @@ check-bytecode-2: #: Check deparsing bytecode 3.x only check-bytecode-3: - $(PYTHON) test_pythonlib.py --bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5 + $(PYTHON) test_pythonlib.py --bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5 --bytecode-3.6 #: Check deparsing bytecode that works running Python 2 and Python 3 check-bytecode: check-bytecode-3 @@ -90,6 +94,10 @@ check-bytecode-3.4: check-bytecode-3.5: $(PYTHON) test_pythonlib.py --bytecode-3.5 +#: Check deparsing Python 3.6 +check-bytecode-3.6: + $(PYTHON) test_pythonlib.py --bytecode-3.6 + #: short tests for bytecodes only for this version of Python check-native-short: $(PYTHON) test_pythonlib.py --bytecode-$(PYTHON_VERSION) --verify $(COMPILE) diff --git a/test/bytecode_3.6/fstring.py b/test/bytecode_3.6/fstring.py new file mode 100644 index 00000000..32b6fadc --- /dev/null +++ b/test/bytecode_3.6/fstring.py @@ -0,0 +1,5 @@ +def fn(var1, var2): + return f'interpolate {var1} strings {var2} py36' + + +fn('a', 'b') \ No newline at end of file diff --git a/test/bytecode_3.6/fstring_single.py b/test/bytecode_3.6/fstring_single.py new file mode 100644 index 00000000..7df632f5 --- /dev/null +++ b/test/bytecode_3.6/fstring_single.py @@ -0,0 +1,2 @@ +def fn(var): + return f'interpolate {var} strings' \ No newline at end of file diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index 4b537260..39f99ac7 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -72,13 +72,13 @@ test_options = { PYOC, 'base_2.7', 2.7), } -for vers in (2.7, 3.4, 3.5): +for vers in (2.7, 3.4, 3.5, 3.6): pythonlib = "ok_lib%s" % vers key = "ok-%s" % vers test_options[key] = (os.path.join(src_dir, pythonlib), PYOC, key, vers) pass -for vers in (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5): +for vers in (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6): bytecode = "bytecode_%s" % vers key = "bytecode-%s" % vers test_options[key] = (bytecode, PYC, bytecode, vers) diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index 2d9b4031..dd93f799 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -64,7 +64,7 @@ def usage(): def main_bin(): - if not (sys.version_info[0:2] in ((2, 6), (2, 7), (3, 2), (3, 3), (3, 4), (3, 5))): + if not (sys.version_info[0:2] in ((2, 6), (2, 7), (3, 2), (3, 3), (3, 4), (3, 5), (3, 6))): print('Error: %s requires Python 2.6, 2.7, 3.2, 3.3, 3.4 or 3.5' % program, file=sys.stderr) sys.exit(-1) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index bafc121c..8d775ab7 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -644,6 +644,11 @@ def get_python_parser( p = parse3.Python35onParser(debug_parser) else: p = parse3.Python35onParserSingle(debug_parser) + elif version >= 3.6: + if compile_mode == 'exec': + p = parse3.Python36Parser(debug_parser) + else: + p = parse3.Python36ParserSingle(debug_parser) else: if compile_mode == 'exec': p = parse3.Python3Parser(debug_parser) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index a6aa2845..7338edf8 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -628,6 +628,52 @@ class Python35onParser(Python3Parser): """ + +class Python36Parser(Python3Parser): + + def p_36(self, args): + """ + + # Python 3.5+ has WITH_CLEANUP_START/FINISH + + withstmt ::= expr SETUP_WITH exprlist suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withasstmt ::= expr SETUP_WITH designator suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + inplace_op ::= INPLACE_MATRIX_MULTIPLY + binary_op ::= BINARY_MATRIX_MULTIPLY + + # Python 3.5+ does jump optimization + # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. + # in return_stmt, we will need the semantic actions in pysource.py + # to work out whether to dedent or not based on the presence of + # RETURN_END_IF vs RETURN_VALUE + + ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec + return_stmt ::= ret_expr RETURN_END_IF + + + # Python 3.3+ also has yield from. 3.5 does it + # differently than 3.3, 3.4 + + expr ::= yield_from + yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM + + # Python 3.4+ has more loop optimization that removes + # JUMP_FORWARD in some cases, and hence we also don't + # see COME_FROM + _ifstmts_jump ::= c_stmts_opt + """ + + class Python3ParserSingle(Python3Parser, PythonParserSingle): pass @@ -642,6 +688,9 @@ class Python33ParserSingle(Python33Parser, PythonParserSingle): class Python35onParserSingle(Python35onParser, PythonParserSingle): pass +class Python36ParserSingle(Python36Parser, PythonParserSingle): + pass + def info(args): # Check grammar # Should also add a way to dump grammar diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 1c71aed4..23490e90 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -22,7 +22,7 @@ from uncompyle6 import PYTHON3 from uncompyle6.scanners.tok import Token # The byte code versions we support -PYTHON_VERSIONS = (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5) +PYTHON_VERSIONS = (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6) # FIXME: DRY if PYTHON3: diff --git a/uncompyle6/scanners/scanner36.py b/uncompyle6/scanners/scanner36.py new file mode 100644 index 00000000..fe212aea --- /dev/null +++ b/uncompyle6/scanners/scanner36.py @@ -0,0 +1,35 @@ +# Copyright (c) 2016 by Rocky Bernstein +""" +Python 3.5 bytecode scanner/deparser + +This sets up opcodes Python's 3.5 and calls a generalized +scanner routine for Python 3. +""" + +from __future__ import print_function + +from uncompyle6.scanners.scanner3 import Scanner3 + +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_36 as opc +JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) + +class Scanner36(Scanner3): + + def __init__(self, show_asm=None): + Scanner3.__init__(self, 3.6, show_asm) + return + pass + +if __name__ == "__main__": + from uncompyle6 import PYTHON_VERSION + if PYTHON_VERSION == 3.6: + import inspect + co = inspect.currentframe().f_code + tokens, customize = Scanner36().disassemble(co) + for t in tokens: + print(t.format()) + pass + else: + print("Need to be Python 3.6 to demo; I am %s." % + PYTHON_VERSION) diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 46f37db1..b00625f8 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -380,7 +380,6 @@ TABLE_DIRECT = { # Python 3.4+ Additions ####################### 'LOAD_CLASSDEREF': ( '%{pattr}', ), - } diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 726a0a05..4411cbe0 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -207,6 +207,9 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): elif version == 3.5: import uncompyle6.scanners.scanner35 as scan scanner = scan.Scanner35() + elif version == 3.6: + import uncompyle6.scanners.scanner36 as scan + scanner = scan.Scanner36() global JUMP_OPs JUMP_OPs = list(scan.JUMP_OPs) + ['JUMP_BACK'] From 5d27832d6f60d6f3dfecb267917b0a0d550a7994 Mon Sep 17 00:00:00 2001 From: DanielBradburn Date: Wed, 10 Aug 2016 08:20:51 +0200 Subject: [PATCH 2/8] resolve merge conflicts from pull request --- ChangeLog | 316 +++++++++++++++++- Makefile | 10 +- NEWS | 17 + README.rst | 24 +- __pkginfo__.py | 2 +- pytest/test_fjt.py | 4 +- pytest/test_grammar.py | 44 +++ pytest/testdata/if-2.7.right | 16 +- pytest/testdata/ifelse-2.7.right | 20 +- requirements.txt | 2 +- test/Makefile | 21 +- test/bytecode_2.3/09_whiletrue_bug.pyc | Bin 326 -> 338 bytes test/bytecode_2.6/09_whiletrue_bug.pyc | Bin 326 -> 327 bytes test/bytecode_2.7/05_abc_class.pyc | Bin 0 -> 502 bytes test/bytecode_2.7/06_setif_comprehension.pyc | Bin 0 -> 393 bytes test/bytecode_2.7/09_whiletrue_bug.pyc | Bin 0 -> 320 bytes ...rn_bug.pyc-notyet => 07_if_return_bug.pyc} | Bin test/bytecode_3.6/01_fstring.pyc | Bin 0 -> 234 bytes test/bytecode_pypy2.7/00_assign_pypy.pyc | Bin 0 -> 211 bytes test/bytecode_pypy2.7/01_assert2.pyc | Bin 0 -> 384 bytes test/bytecode_pypy2.7/01_fns.pyc | Bin 0 -> 155 bytes test/bytecode_pypy2.7/02_closure.pyc | Bin 0 -> 348 bytes test/bytecode_pypy2.7/02_complex.pyc | Bin 0 -> 154 bytes test/bytecode_pypy2.7/02_def.pyc | Bin 0 -> 624 bytes test/bytecode_pypy2.7/02_slice.pyc | Bin 0 -> 176 bytes test/bytecode_pypy2.7/03_if_elif.pyc | Bin 0 -> 255 bytes test/bytecode_pypy2.7/03_map.pyc | Bin 0 -> 738 bytes test/bytecode_pypy2.7/03_try_return.pyc | Bin 0 -> 551 bytes test/bytecode_pypy2.7/05_setattr.pyc | Bin 0 -> 357 bytes test/bytecode_pypy3.2/03_map.pyc | Bin 0 -> 742 bytes ..._abc_class.pyc-notyet => 05_abc_class.pyc} | Bin ..._const_map.pyc-notyet => 05_const_map.pyc} | Bin test/bytecode_pypy3.2/05_setattr.pyc | Bin 0 -> 402 bytes ...t_and.pyc-notyet => 06_list_ifnot_and.pyc} | Bin ....pyc-notyet => 06_setif_comprehension.pyc} | Bin ..._tryifelse.pyc-notyet => 06_tryifelse.pyc} | Bin test/simple_source/bug27+/05_setattr.py | 9 + test/simple_source/bug36/01_fstring.py | 3 + .../bug_pypy27/00_assign_pypy.py | 4 + test/simple_source/bug_pypy27/01_assert2.py | 17 + .../simple_source/bug_pypy27/03_try_return.py | 16 + test/simple_source/stmts/03_if_elif.py | 9 +- test/simple_source/stmts/09_whiletrue_bug.py | 7 +- test/test_pyenvlib.py | 1 + test/test_pythonlib.py | 4 +- uncompyle6/bin/uncompile.py | 2 +- uncompyle6/disas.py | 8 +- uncompyle6/main.py | 13 +- uncompyle6/parser.py | 90 ++--- uncompyle6/parsers/parse2.py | 224 ++++++++++--- uncompyle6/parsers/parse23.py | 6 +- uncompyle6/parsers/parse24.py | 1 + uncompyle6/parsers/parse25.py | 1 - uncompyle6/parsers/parse26.py | 20 +- uncompyle6/parsers/parse27.py | 53 ++- uncompyle6/parsers/parse3.py | 248 ++++++-------- uncompyle6/parsers/parse34.py | 25 +- uncompyle6/parsers/parse35.py | 78 +++++ uncompyle6/parsers/parse36.py | 52 +++ uncompyle6/scanner.py | 11 +- uncompyle6/scanners/pypy27.py | 26 ++ uncompyle6/scanners/pypy32.py | 26 ++ uncompyle6/scanners/scanner2.py | 74 +++- uncompyle6/scanners/scanner26.py | 6 +- uncompyle6/scanners/scanner27.py | 8 +- uncompyle6/scanners/scanner3.py | 100 ++++-- uncompyle6/scanners/scanner35.py | 2 +- uncompyle6/scanners/tok.py | 50 ++- uncompyle6/semantics/aligner.py | 147 ++++++++ uncompyle6/semantics/fragments.py | 60 ++-- uncompyle6/semantics/pysource.py | 190 +++++++---- uncompyle6/show.py | 4 +- uncompyle6/verify.py | 31 +- uncompyle6/version.py | 2 +- 74 files changed, 1584 insertions(+), 520 deletions(-) create mode 100644 pytest/test_grammar.py create mode 100644 test/bytecode_2.7/05_abc_class.pyc create mode 100644 test/bytecode_2.7/06_setif_comprehension.pyc create mode 100644 test/bytecode_2.7/09_whiletrue_bug.pyc rename test/bytecode_3.5/{07_if_return_bug.pyc-notyet => 07_if_return_bug.pyc} (100%) create mode 100644 test/bytecode_3.6/01_fstring.pyc create mode 100644 test/bytecode_pypy2.7/00_assign_pypy.pyc create mode 100644 test/bytecode_pypy2.7/01_assert2.pyc create mode 100644 test/bytecode_pypy2.7/01_fns.pyc create mode 100644 test/bytecode_pypy2.7/02_closure.pyc create mode 100644 test/bytecode_pypy2.7/02_complex.pyc create mode 100644 test/bytecode_pypy2.7/02_def.pyc create mode 100644 test/bytecode_pypy2.7/02_slice.pyc create mode 100644 test/bytecode_pypy2.7/03_if_elif.pyc create mode 100644 test/bytecode_pypy2.7/03_map.pyc create mode 100644 test/bytecode_pypy2.7/03_try_return.pyc create mode 100644 test/bytecode_pypy2.7/05_setattr.pyc create mode 100644 test/bytecode_pypy3.2/03_map.pyc rename test/bytecode_pypy3.2/{05_abc_class.pyc-notyet => 05_abc_class.pyc} (100%) rename test/bytecode_pypy3.2/{05_const_map.pyc-notyet => 05_const_map.pyc} (100%) create mode 100644 test/bytecode_pypy3.2/05_setattr.pyc rename test/bytecode_pypy3.2/{06_list_ifnot_and.pyc-notyet => 06_list_ifnot_and.pyc} (100%) rename test/bytecode_pypy3.2/{06_setif_comprehension.pyc-notyet => 06_setif_comprehension.pyc} (100%) rename test/bytecode_pypy3.2/{06_tryifelse.pyc-notyet => 06_tryifelse.pyc} (100%) create mode 100644 test/simple_source/bug27+/05_setattr.py create mode 100644 test/simple_source/bug36/01_fstring.py create mode 100644 test/simple_source/bug_pypy27/00_assign_pypy.py create mode 100644 test/simple_source/bug_pypy27/01_assert2.py create mode 100644 test/simple_source/bug_pypy27/03_try_return.py create mode 100644 uncompyle6/parsers/parse35.py create mode 100644 uncompyle6/parsers/parse36.py create mode 100644 uncompyle6/scanners/pypy27.py create mode 100644 uncompyle6/scanners/pypy32.py create mode 100644 uncompyle6/semantics/aligner.py diff --git a/ChangeLog b/ChangeLog index 96ab578b..4d246ccf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,320 @@ +2016-08-03 rocky + + * uncompyle6/version.py: Get ready for release 2.8.0 + +2016-07-29 rocky + + * test/Makefile, uncompyle6/semantics/fragments.py: Small changes + +2016-07-29 rocky + + * uncompyle6/parsers/parse35.py, uncompyle6/scanner.py, + uncompyle6/scanners/scanner3.py, uncompyle6/semantics/pysource.py: + Fix 3.5 misclassifying RETURN_VALUE We use location of SETUP_EXCEPT instructions to disambiguate. + +2016-07-28 rocky + + * uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse23.py, + uncompyle6/parsers/parse24.py: while1 bug applied to Python 2.3 and + 2.4 + +2016-07-28 rocky + + * uncompyle6/scanners/scanner3.py, uncompyle6/scanners/tok.py: PyPy + 3.2 bug confusing RETURN_END_IF for except Also fix a instruction formatting bug + +2016-07-27 rocky + + * uncompyle6/parser.py, uncompyle6/parsers/parse3.py, + uncompyle6/parsers/parse35.py: Split out 3.5 parser + +2016-07-27 rocky + + * pytest/test_grammar.py, uncompyle6/parser.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py, + uncompyle6/parsers/parse34.py: Add python 3.4 grammar checking DRY grammar testing + +2016-07-27 rocky + + * uncompyle6/parsers/parse25.py, uncompyle6/parsers/parse26.py, + uncompyle6/parsers/parse27.py, uncompyle6/parsers/parse3.py, + uncompyle6/parsers/parse34.py: Clean and check Python 2.6 grammar + +2016-07-27 rocky + + * pytest/test_grammar.py, uncompyle6/parser.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse26.py, + uncompyle6/parsers/parse27.py, uncompyle6/parsers/parse3.py: Start + to segregate and clean up grammar + +2016-07-27 rocky + + * pytest/test_grammar.py, uncompyle6/disas.py, + uncompyle6/scanner.py, uncompyle6/semantics/fragments.py: Add + is_pypy parameter to places that need it + +2016-07-27 rocky + + * test/simple_source/stmts/09_whiletrue_bug.py, + uncompyle6/parser.py, uncompyle6/parsers/parse2.py, + uncompyle6/parsers/parse26.py, uncompyle6/parsers/parse27.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner3.py, + uncompyle6/semantics/pysource.py: 2.6 and 2.7 while1 grammar rule Fixes issue #40 + +2016-07-27 rocky + + * pytest/test_grammar.py, uncompyle6/parser.py, + uncompyle6/parsers/parse3.py: Start grammar checker + +2016-07-27 rocky + + * uncompyle6/main.py, uncompyle6/show.py: Show magic number in + output Fix bugs due to removal of token.format() + +2016-07-27 rocky + + * uncompyle6/disas.py, uncompyle6/parsers/parse2.py, + uncompyle6/parsers/parse3.py, uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner27.py, uncompyle6/scanners/scanner3.py, + uncompyle6/scanners/scanner35.py, uncompyle6/scanners/tok.py, + uncompyle6/show.py: tok.format -> tok.__str__; simplify pypy code + +2016-07-27 rocky + + * uncompyle6/semantics/pysource.py, uncompyle6/verify.py: Python 2.7 + set comprehension bug + +2016-07-27 rocky + + * uncompyle6/semantics/pysource.py: separate semantic action version + differences Added customize_for_version which uses is_pypy and version to adjust + tables + +2016-07-27 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: Customize tables better for + specific Python versions + +2016-07-27 rocky + + * uncompyle6/parsers/parse2.py, uncompyle6/semantics/pysource.py: + Small code clean up + +2016-07-26 rocky + + * uncompyle6/scanners/tok.py, uncompyle6/semantics/fragments.py, + uncompyle6/verify.py: Usuability fixes * try using format for __str__ * Explicitly nuke self.attr and self.pattr when no arg * Sync pysource and format wrt make_function + +2016-07-26 rocky + + * ChangeLog, NEWS, README.rst, __pkginfo__.py, requirements.txt, + test/test_pyenvlib.py, uncompyle6/version.py: Get ready for release + 2.7.1 + +2016-07-26 rocky + + * test/simple_source/bug_pypy27/00_assign_pypy.py, + test/simple_source/bug_pypy27/03_try_return.py, uncompyle6/main.py, + uncompyle6/parsers/parse2.py, uncompyle6/scanners/scanner2.py, + uncompyle6/semantics/pysource.py: Custom PyPy rules for + tryfinallysmt, assign{2,3} + +2016-07-26 rocky + + * test/simple_source/bug_pypy27/01_assert2.py, + test/simple_source/bug_pypy27/03_try_return.py, + uncompyle6/parser.py, uncompyle6/parsers/parse2.py, + uncompyle6/parsers/parse27.py, uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner3.py, uncompyle6/semantics/pysource.py: + More PyPy grammar rules * assert one and two-arg form * trystmt Simplify adding multiple grammar rules + +2016-07-25 rocky + + * pytest/testdata/if-2.7.right, pytest/testdata/ifelse-2.7.right, + uncompyle6/scanners/tok.py: Instruction formatting - yet again + +2016-07-25 rocky + + * uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py, + uncompyle6/semantics/pysource.py: Add grammar for PyPy 2-arg assert + +2016-07-25 rocky + + * uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner3.py, + uncompyle6/scanners/tok.py, uncompyle6/semantics/pysource.py: PyPy + BUILD_MAP_n. Reinstate bytecode tests + +2016-07-25 rocky + + * uncompyle6/parser.py, uncompyle6/parsers/parse2.py: Handle PyPy + BUILD_MAP_0 where actual kw_args > 0 + +2016-07-25 rocky + + * uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py: Clean + up PyPy load_attr grammar rules + +2016-07-25 rocky + + * Makefile: Enable more PyPy testing + +2016-07-25 rocky + + * test/Makefile: Start checking PyPy bytecodes + +2016-07-25 rocky + + * test/Makefile, uncompyle6/scanners/pypy27.py, + uncompyle6/scanners/pypy32.py: Add pypy scanners + +2016-07-25 rocky + + * __pkginfo__.py, requirements.txt, test/Makefile, + test/simple_source/bug27+/05_setattr.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner27.py, + uncompyle6/scanners/scanner3.py: Handle PyPy CALL_METHOD op more + correctly Start testing pypy2.7 and 3.2 bytecodes + +2016-07-25 rocky + + * uncompyle6/parsers/parse2.py: add_custom_rules() in 2.x and 3.x + are more alike + +2016-07-25 rocky + + * README.rst, test/simple_source/stmts/03_if_elif.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse27.py, + uncompyle6/parsers/parse3.py, uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner3.py, uncompyle6/semantics/pysource.py: + Handle PyPy JUMP_IF_NOT_DEBUG Update README.rst to note PyPY and reorganize a little + +2016-07-25 rocky + + * pytest/testdata/if-2.7.right, pytest/testdata/ifelse-2.7.right, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner26.py, + uncompyle6/scanners/scanner3.py, uncompyle6/scanners/tok.py: Better + assembly formatting of jump instructions + +2016-07-24 rocky + + * Makefile, test/Makefile, uncompyle6/parsers/parse2.py, + uncompyle6/parsers/parse3.py, uncompyle6/scanners/scanner3.py: More + PyPy LOOKUP_METHOD rules + +2016-07-24 rocky + + * Makefile, __pkginfo__.py, pytest/test_fjt.py, requirements.txt, + test/Makefile, test/test_pythonlib.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py, + uncompyle6/scanner.py, uncompyle6/scanners/scanner2.py, + uncompyle6/scanners/scanner3.py, uncompyle6/semantics/pysource.py: + PyPy support * Use proper PYPY 32 opcodes * handle opcodes LOOKUP_METHOD and CALL_METHOD * Administrative stuff for PyPy + +2016-07-24 rocky + + * test/add-test.py: add-test: Make sure PyPy bytecode is separated + +2016-07-24 rocky + + * : commit 21683719e1a07d51095d32200ec294d659746474 Author: rocky + Date: Sun Jul 24 04:16:54 2016 -0400 + +2016-07-24 rocky + + * : commit 7e8173b07620c344a73660b354f05a61c4723d18 Author: rocky + Date: Sun Jul 24 03:44:26 2016 -0400 + +2016-07-23 rocky + + * test/simple_source/bug27+/05_for_try_except.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner27.py: + Another 2.7 'continue' detection bug + +2016-07-23 rocky + + * test/simple_source/bug27+/05_for_try_except.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner27.py: + Another 2.7 'continue' detection bug + +2016-07-23 rocky + + * test/simple_source/bug27+/05_for_try_except.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner27.py: + Another 2.7 'continue' detection bug + +2016-07-23 rocky + + * test/simple_source/bug27+/05_for_try_except.py, + uncompyle6/scanners/scanner2.py: 2.7: Detect "continue" inside + except Fixes issue #38. This is a bit hacky. We need a more general "continue" detection. + +2016-07-23 rocky + + * : commit a5f45f232decad2e74bfdf476255604273fd95fd Author: rocky + Date: Sat Jul 23 10:37:41 2016 -0400 + +2016-07-21 rocky + + * __pkginfo__.py, test/test_pyenvlib.py, uncompyle6/disas.py, + uncompyle6/main.py, uncompyle6/parser.py, + uncompyle6/parsers/parse27.py, uncompyle6/scanner.py, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/scanner27.py, + uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py, uncompyle6/verify.py: Start + handling pypy 2.7 Need to understand whether we care compiling pypy. Pypy 2.7 list + comprehensions are different and use its own opcode. + +2016-07-20 rocky + + * HISTORY.md, README.rst: Update HISTORY and add link to it in + README.md + +2016-07-17 rocky + + * uncompyle6/parser.py, uncompyle6/semantics/pysource.py: Better + parse error formatting Start to move away for compiler-oriented terminology: Favor "instructions" over "tokens". Syntax error -> Parse error. + +2016-07-17 rocky + + * pytest/testdata/if-2.7.right, pytest/testdata/ifelse-2.7.right, + uncompyle6/scanners/scanner2.py, uncompyle6/scanners/tok.py: Align + disassembly output with xdis align number of offset fields with xdis. Show None type when we + mean None, not '' + +2016-07-17 rocky + + * uncompyle6/scanners/scanner3.py: Respect after/both option in + scanner3 + +2016-07-17 rocky + + * pytest/testdata/if-2.7.right, pytest/testdata/ifelse-2.7.right: + Adjust test data for changed disasm output + +2016-07-16 rocky + + * : commit 942b15e3c62963b32aaba877fd451ffe304c07a9 Author: rocky + Date: Sat Jul 16 14:12:31 2016 -0400 + +2016-07-16 rocky + + * README.rst: More explicit usage info + 2016-07-15 rocky - * README.rst, uncompyle6/version.py: Get ready for release 2.7.0 + * README.rst, test/Makefile, + test/simple_source/bug35/01_matrix_multiply.py, + uncompyle6/parsers/parse3.py, uncompyle6/semantics/pysource.py: Add + 3.5 matrix mult ops We now run 3.5 verifycation so we need to remove some of the tests + that fail to verify pending fixing. + +2016-07-15 rocky + + * ChangeLog, NEWS, README.rst, uncompyle6/version.py: Get ready for + release 2.7.0 2016-07-14 rocky diff --git a/Makefile b/Makefile index 01ae79ca..910e17d6 100644 --- a/Makefile +++ b/Makefile @@ -33,13 +33,21 @@ check-2.7 check-3.3 check-3.4: pytest #: Tests for Python 3.2 and 3.5 - pytest doesn't work here # Or rather 3.5 doesn't work not on Travis -check-3.2 check-3.5: +check-3.2 check-3.5 check-3.6: $(MAKE) -C test $@ #:Tests for Python 2.6 (doesn't have pytest) check-2.6: $(MAKE) -C test $@ +#:PyPy 2.6.1 or PyPy 5.0.1 +# Skip for now +2.6 5.0: + +#:PyPy pypy3-2.4.0 Python 3: +pypy-3.2 2.4: + $(MAKE) -C test $@ + #: Run py.test tests pytest: $(MAKE) -C pytest check diff --git a/NEWS b/NEWS index df7bfdb7..5b87b486 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,20 @@ +uncompyle6 2.8.0 2016-08-03 + +- Start Python 3.6 support (moagstar) + more work on PEP 498 needed +- tidy bytecode/word output +- numerous decompiling bugs fixed +- grammar testing started +- show magic number in deparsed output +- better grammar and semantic action segregation based + on python bytecode version + +uncompyle6 2.7.1 2016-07-26 + +- PyPy bytecodes for 2.7 and 3.2 added +- Instruction formatting improved slightly +- 2.7 bytecode "continue" bug fixed + uncompyle6 2.7.0 2016-07-15 - Many Syntax and verifification bugs removed diff --git a/README.rst b/README.rst index 19cf8d65..b0a6f19a 100644 --- a/README.rst +++ b/README.rst @@ -12,20 +12,19 @@ Introduction *uncompyle6* translates Python bytecode back into equivalent Python source code. It accepts bytecodes from Python version 2.3 to 3.6 or -so. The code requires Python 2.6 or later and has been tested on Python -running versions 2.3-2.7, and 3.2-3.6. +so, including PyPy bytecode. Why this? --------- There were a number of decompyle, uncompile, uncompyle2, uncompyle3 -forks around. All of them come basically from the same code base, and -almost all of them no longer maintained or worked on. Only one handled -Python 3, and even there, only 3.2. This code pulls these together, -handles a wide range of bytecodes and addresses a number of open -issues in previous forks. +forks around. All of them came basically from the same code base, and +almost all of them no were no longer actively maintained. Only one +handled Python 3, and even there, only 3.2. This code pulls these +together and moves forward. It also addresses a number of open issues +in the previous forks. -What makes this different from other CPython bytecode decompilers? Its +What makes this different from other CPython bytecode decompilers?: its ability to deparse just fragments and give source-code information around a given bytecode offset. @@ -41,6 +40,13 @@ location in more detail than just a line number. It can be also used when source-code information does not exist and there is just bytecode information. +Requirements +------------ + +This project requires Python 2.6 or later, PyPy 3-2.4, or PyPy-5.0.1. +The bytecode files it can read has been tested on Python bytecodes from +versions 2.3-2.7, and 3.2-3.6 and the above-mentioned PyPy versions. + Installation ------------ @@ -90,7 +96,7 @@ For usage help: Known Bugs/Restrictions ----------------------- -Python 2 deparsing decompiles and verifies from Python 2.3.7 to Python +Python 2 deparsing decompiles and about 90% verifies from Python 2.3.7 to Python 3.4.2 on the standard library packages I have on my system. (Verification is the process of decompiling bytecode, compiling with a diff --git a/__pkginfo__.py b/__pkginfo__.py index 08c75f34..446e5f02 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -37,7 +37,7 @@ entry_points={ ]} ftp_url = None install_requires = ['spark-parser >= 1.4.0', - 'xdis >= 2.0.0'] + 'xdis >= 2.1.0'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' modname = 'uncompyle6' diff --git a/pytest/test_fjt.py b/pytest/test_fjt.py index fdf2c4b8..03250975 100644 --- a/pytest/test_fjt.py +++ b/pytest/test_fjt.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from uncompyle6 import PYTHON_VERSION +from uncompyle6 import PYTHON_VERSION, IS_PYPY from uncompyle6.scanner import get_scanner from array import array def bug(state, slotstate): @@ -12,7 +12,7 @@ def test_if_in_for(): code = bug.__code__ scan = get_scanner(PYTHON_VERSION) print(PYTHON_VERSION) - if 2.7 <= PYTHON_VERSION <= 3.0: + if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY: n = scan.setup_code(code) scan.build_lines_data(code, n) scan.build_prev_op(n) diff --git a/pytest/test_grammar.py b/pytest/test_grammar.py new file mode 100644 index 00000000..ebcdaab2 --- /dev/null +++ b/pytest/test_grammar.py @@ -0,0 +1,44 @@ +import pytest, re +from uncompyle6 import PYTHON_VERSION, PYTHON3, IS_PYPY # , PYTHON_VERSION +from uncompyle6.parser import get_python_parser +from uncompyle6.scanner import get_scanner + +def test_grammar(): + + def check_tokens(tokens, opcode_set): + remain_tokens = set(tokens) - opcode_set + remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set(remain_tokens) - opcode_set + assert remain_tokens == set([]), \ + "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dumpGrammar()) + + p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY) + lhs, rhs, tokens, right_recursive = p.checkSets() + expect_lhs = set(['expr1024', 'pos_arg']) + unused_rhs = set(['build_list', 'call_function', 'mkfunc', 'mklambda', + 'unpack', 'unpack_list']) + expect_right_recursive = [['designList', ('designator', 'DUP_TOP', 'designList')]] + if PYTHON3: + expect_lhs.add('load_genexpr') + unused_rhs = unused_rhs.union(set(""" + except_pop_except genexpr classdefdeco2 listcomp + """.split())) + else: + expect_lhs.add('kwarg') + assert expect_lhs == set(lhs) + assert unused_rhs == set(rhs) + assert expect_right_recursive == right_recursive + s = get_scanner(PYTHON_VERSION, IS_PYPY) + ignore_set = set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP + LAMBDA_MARKER RETURN_LAST + """.split()) + if 2.6 <= PYTHON_VERSION <= 2.7: + opcode_set = set(s.opc.opname).union(ignore_set) + check_tokens(tokens, opcode_set) + elif PYTHON_VERSION == 3.4: + ignore_set.add('LOAD_CLASSNAME') + opcode_set = set(s.opc.opname).union(ignore_set) + check_tokens(tokens, opcode_set) diff --git a/pytest/testdata/if-2.7.right b/pytest/testdata/if-2.7.right index 8c258cad..61eecf86 100644 --- a/pytest/testdata/if-2.7.right +++ b/pytest/testdata/if-2.7.right @@ -1,12 +1,12 @@ # Python 2.7 # Embedded file name: simple_source/branching/05_if.py - 6 0 LOAD_NAME 0 'True' - 3 POP_JUMP_IF_FALSE 15 '15' + 6 0 LOAD_NAME 0 'True' + 3 POP_JUMP_IF_FALSE 15 'to 15' - 7 6 LOAD_NAME 1 'False' - 9 STORE_NAME 2 'b' - 12 JUMP_FORWARD 0 '15' - 15_0 COME_FROM '12' - 15 LOAD_CONST 0 None - 18 RETURN_VALUE + 7 6 LOAD_NAME 1 'False' + 9 STORE_NAME 2 'b' + 12 JUMP_FORWARD 0 'to 15' + 15_0 COME_FROM '12' + 15 LOAD_CONST 0 '' + 18 RETURN_VALUE diff --git a/pytest/testdata/ifelse-2.7.right b/pytest/testdata/ifelse-2.7.right index 1f58e666..d6ee0b34 100644 --- a/pytest/testdata/ifelse-2.7.right +++ b/pytest/testdata/ifelse-2.7.right @@ -1,15 +1,15 @@ # Python 2.7 # Embedded file name: simple_source/branching/05_ifelse.py - 3 0 LOAD_NAME 0 'True' - 3 POP_JUMP_IF_FALSE 15 '15' + 3 0 LOAD_NAME 0 'True' + 3 POP_JUMP_IF_FALSE 15 'to 15' - 4 6 LOAD_CONST 0 1 - 9 STORE_NAME 1 'b' - 12 JUMP_FORWARD 6 '21' + 4 6 LOAD_CONST 0 1 + 9 STORE_NAME 1 'b' + 12 JUMP_FORWARD 6 'to 21' - 6 15 LOAD_CONST 1 2 - 18 STORE_NAME 2 'd' - 21_0 COME_FROM '12' - 21 LOAD_CONST 2 None - 24 RETURN_VALUE + 6 15 LOAD_CONST 1 2 + 18 STORE_NAME 2 'd' + 21_0 COME_FROM '12' + 21 LOAD_CONST 2 '' + 24 RETURN_VALUE diff --git a/requirements.txt b/requirements.txt index 6a5b2f51..f56c1fe2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ spark-parser >= 1.2.1 -xdis >= 2.0.0 +xdis >= 2.1.0 diff --git a/test/Makefile b/test/Makefile index 8f3cbe6b..d4db0489 100644 --- a/test/Makefile +++ b/test/Makefile @@ -48,15 +48,18 @@ check-disasm: #: Check deparsing bytecode 2.x only check-bytecode-2: - $(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.4 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 + $(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.4 \ + --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 --bytecode-pypy2.7 #: Check deparsing bytecode 3.x only check-bytecode-3: - $(PYTHON) test_pythonlib.py --bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5 --bytecode-3.6 + $(PYTHON) test_pythonlib.py --bytecode-3.2 --bytecode-3.3 \ + --bytecode-3.4 --bytecode-3.5 --bytecode-3.6 --bytecode-pypy3.2 #: Check deparsing bytecode that works running Python 2 and Python 3 check-bytecode: check-bytecode-3 - $(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.4 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 + $(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.4 \ + --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 --bytecode-pypy2.7 #: Check deparsing Python 2.3 check-bytecode-2.3: @@ -118,6 +121,18 @@ check-3.2-ok: check-3.4-ok: $(PYTHON) test_pythonlib.py --ok-3.4 --verify $(COMPILE) +#: PyPy of some sort. E.g. [PyPy 5.0.1 with GCC 4.8.4] +# Skip for now +2.6: + +#: PyPy 5.0.x with Python 2.7 ... +pypy-2.7 5.0 5.3: + $(PYTHON) test_pythonlib.py --bytecode-pypy2.7 --verify + +#: PyPy 2.4.x with Python 3.2 ... +pypy-3.2 2.4: + $(PYTHON) test_pythonlib.py --bytecode-pypy3.2 --verify + clean: clean-py-dis clean-dis clean-unverified clean-dis: diff --git a/test/bytecode_2.3/09_whiletrue_bug.pyc b/test/bytecode_2.3/09_whiletrue_bug.pyc index 09b3a9bb7ebc8f9814e09847782367390f220b4e..0c2764967a8ed406bf64978eb41c78c6a2edf4ef 100644 GIT binary patch delta 183 zcmX@cbcspY`V%kL&DS%+lNq3Z6-YY(adF{9X%!y^h7?AIR7QqoMn=Z)LSY7$RAz=O zE{1$w2A}|A3KK&V3jv)d3p0SS6Kj-IL8cdT z0Ezhcw9K5;`1oQLASa}#G_{x=$Vkr5Ey&E7cqUCs49F8;WM*JzWM&j$cT%0sfTE&)`A%&44m64&Dk&!XHP?&)wm4zXT ziy@zv0Vu$j!o(28$^aB*i~tegdF%{Ou~b$DcaMqPO1x|!qga4MVo`eW#C>Ai96)wj hW=?8eV(!FCX(9qZt^gx513M!#qX;7(qX-ilBLEN^8Ik}1 diff --git a/test/bytecode_2.6/09_whiletrue_bug.pyc b/test/bytecode_2.6/09_whiletrue_bug.pyc index d085eac17bd77ce8f642ad5534e190bf82b35b1e..a5cd8c5924292bc7e3893f2af8fddbf837f2e7b2 100644 GIT binary patch delta 172 zcmX@cbeu{0;wN6No3CetCo@0+JCJq&;^K^n(kjjj3@MBZsf-NGjEs!og~ALhsmu&n zTnzcV3_tsQs7y%-}^Vk`nVyUbQ?j94Hl=RquMwPGtiNvDxVlX45s5G?% uWJ_{>Zb4>FY6%CBod#s&CFV{%nI<9tQ+4|2!aQ10m1d+O}u#DLst-sFr;aUg>+k*^dNZCKc~OJf8if+ zCaE6>12dUq-n{pc>~lZ+csRS$Fs*dj-7ysvfB~olR03WBUI6Y3IwY``jPnPSRO$Sn zwAWTS*BbxWRX=2c`U!Tap2_+BkEC2=ggtgyN zKE4YOG#=+xa>h7Lx5H@sAr**?`!*H*Ao?w8xUn~b--(b-;}IoGhG}PP9a>sht4eDm zbs!{vN|kHc-Wqh>IB2u1Ouf9gL{(}G&pz*;a`v#tNG?Dugk;T0ZlhX5E;717Y`bbC vF3kDmIa>(Bzlx#Cx4A(93yQ!x7xQElJ@3-ppC1pn{|JKqRaCqVM9laFPcT;F literal 0 HcmV?d00001 diff --git a/test/bytecode_2.7/06_setif_comprehension.pyc b/test/bytecode_2.7/06_setif_comprehension.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5d2ff1a912424040f662239fd413f7ec494f7af GIT binary patch literal 393 zcmYLF!AiqG5Ph3!H3biXC(j;&LRviNrFc<1deDO)%Cg<8Z6Il4W+D{YQ~d(}N`J>6 za3-y|FmHx^`{vE;;OAgKhl@v#?dfeZ9ZQh`A3y-U48DPzwZT(_%-|u3g!V>21$~KY z2}%WKz!G>l!E=O_!72jFjmQ8tSohGJVrlRh;jJ%?vRVS(NYt2hjC901lIBiSO250J4T%)!fORO!%en}bs(GDO>FKqzWP87sdj$jnsX?r6aFnh|1)&e Uy?zt#3yjMuYm2bytSj`*FUgZk=Kufz literal 0 HcmV?d00001 diff --git a/test/bytecode_2.7/09_whiletrue_bug.pyc b/test/bytecode_2.7/09_whiletrue_bug.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10a6b1c4a0604d1194466af76457c7e46db289bf GIT binary patch literal 320 zcmYL@!A`?442FL<-3AjAV%j^<3ydogLU5RX0~{Dq5ga0D%&4|5B}rjB^Afxo&%luv zz$r*X@@M6uP>`w26@}Kg2z5o!H*9P p_1kltBXjj*GuyA%?b4&lnr6Yr@fJT6Y||zw8qt7uk}h?W!Viz_KW+d3 literal 0 HcmV?d00001 diff --git a/test/bytecode_3.5/07_if_return_bug.pyc-notyet b/test/bytecode_3.5/07_if_return_bug.pyc similarity index 100% rename from test/bytecode_3.5/07_if_return_bug.pyc-notyet rename to test/bytecode_3.5/07_if_return_bug.pyc diff --git a/test/bytecode_3.6/01_fstring.pyc b/test/bytecode_3.6/01_fstring.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36003df6b50b9b773b4ec27bf6e93df722d0e62e GIT binary patch literal 234 zcmY%7<>ktKIWOFufq~&M5W@jGkmUfx#TGy!g@GXoNHQ`+F*2kwF{CgtWHB?OFf*hw zFw6#uvoNGGf*Gs~>8uRRj3C6A!p0D+$$pEm;ud4&Eru$d%)FA+qJsRK#FA8nDo%ys zlA_GK^kRi7R)vB}V>3TZmME68#3Dm5X>^OV04QB@izO>RGjAnB5gX7EVB(izab|8o xPHKE{erZv1s(w;wy0MwQfnj_a#6Z1*%3B;Zx%nxjIjMGxAosHX2{uMHMgV95IKcn_ literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy2.7/00_assign_pypy.pyc b/test/bytecode_pypy2.7/00_assign_pypy.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b2106789ba0e8b072abe339251298bddae06a59 GIT binary patch literal 211 zcmd=3%*$1KY0%`yBL#>U8KM{%q8LGp6eflgW`-yxhA1G(!jQtk5Ujxp zRF(yNsTYgFD*(= y)lVu-k1wb!s5COyH!z4#EH2JW&jSnS6;zgR0F~S1=BJeAq}qY3ECyN5!w3MT@+7VR literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy2.7/01_assert2.pyc b/test/bytecode_pypy2.7/01_assert2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83e3cef1b02309fb9d2fc793c2517587c43db331 GIT binary patch literal 384 zcmYLE!AiqG5S?ifmC%Y51f?LsgY?waUc3~+LknK4&_l(8OPU=eG}(>28)~lgBmAlU zjX&TdEx3obGjHB|vm5?)!ma*UCU7{x;kG3gy9*!y39$CS_P`-Xgl~i`!q)<-5M&Ep zTd-7J@X3R92xbC^_^c3K$Oj5)H7j;qpca_7Cc-;J?Ai^P*CZ!?UcplOu;|xMu*VES z_ULk~mjV1R8r$3M5yP#gdN literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy2.7/01_fns.pyc b/test/bytecode_pypy2.7/01_fns.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b2d8ef41604e0ce2c75506d7f5be963fa3142c7 GIT binary patch literal 155 zcmd=3%**vRtRXC!0SXwQbg>YSk;=f(%)r2y$_S=X7#M;zK=OVXOhB@P9Z00-NO z6qm38**W>iK&A$e#eg76WP#k`%-n*U)cE53(xT*4eW0qGVtoU{__VxYy@JXT4xkE~ S-29Z%oK!oI@x>t1*%<*Acpb3- literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy2.7/02_closure.pyc b/test/bytecode_pypy2.7/02_closure.pyc new file mode 100644 index 0000000000000000000000000000000000000000..174202c0ff61cb054558312d46f74ee886373381 GIT binary patch literal 348 zcmYk1O^O0B6oe}}pa?Rf9>A?Dm01sS1Q#v1a3h#bi_#kDqz4AvDgGS8qj?4|;H$J~ z2U4k*_f>ux_h#dEJx?^ALp@j56cYmnU@Aso+^fjWi(^rh={h1OAW?*lz$L^AdZc+A zCvlPi-vPFbGazOK?0Gn$QBeOOHZ_TVR~s9fqN+(x+s&WJd`~QVSHvyX+tkdQ%`o-)rDU$Qtp1?sa>yewi_3bI*b>f(OZ MHqKM?8-i>-0Lc6t8I8P0~9bq>0&M*BZYw>3P>_C1Zyw>xs^~kC?(?HrvXx40wO9jK%xku zL=DI-&de>yNsTYgFD*(=)laP`C`v6Z&dkr#H!zA%&Iid==oM6!Z~&FsM{H>i!)M3rpqGDrY}o;fPK|OBPCmBG)3P>7;1Mg5N;27Tp#dkn7Mq zdf@geRPo}IhYt$qJVIs`aiE)!zi`fP(rps2e75_w@zBLT!_j!f(_nkZVQ@b}zZb8{ zLRaJNE-1(mzUvJQFR1{&!dVI2v|0b3YG9)gI)1F7RPPG?L$+K`8Ea7D+8U8|eqw;NKdYt?n09+$TPKq?y|#@f3|7 LiBN|@U-$I|Nq{?H literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy2.7/02_slice.pyc b/test/bytecode_pypy2.7/02_slice.pyc new file mode 100644 index 0000000000000000000000000000000000000000..37142bddec1f031386c1c9fb9cdca8ead3f1da15 GIT binary patch literal 176 zcmd=3%*&PcqarMs0ScI*bg?p!kpe`F3@JC6mK3=FAYt^y;3mS;?1W(d|`0jkSn z1QARi0;Jzh1H>-@F%yd_HGmWYf+&#(a*H!_3vyE9i}Op1l2i4Ib25RXfl)k|p;u5@ Z!U0rclbfGXnv-e=GOHM53qK0%!sBb9-nNRc6xk)cQw%ob(H;{-G081fiE2&6!ZAx{ph z#)ILE5J;rh6~qOSB}@!Wj38wx3=F{E96q3o zV{vh6QAuWgo@-H2evt-HgaJX6C<3{~nYjf(<;D4>Mail9#U;5V#rg)u@tJAysX3Wx mdIgnWLv3>NQ%ZAE?N~rMgn$GaBM&1pBO4~h06xgfu zfoY{598dMGyb$u{D+c>?7uUUh|j-TG;TZq literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy2.7/03_try_return.pyc b/test/bytecode_pypy2.7/03_try_return.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e587aeea1bae9ebd4db9873cc0f862b8a5059dc5 GIT binary patch literal 551 zcmYjN!AiqG5S>k`wZ$H!;2$Uv!HOQdh!>IKrDA$eyew_9Dba1R>`suB-t-^*7{8$X z9e==?ZKMX~2)8!zn&6kKGFQ$A(0f|6X1o2@I*iwz&Hx2lmrAEI~2DR z?v$YJ0|G1sEC~T#1$=}sZeaj#U9iU>geU|&Q{QpKlSaN5wuqKS6}U%4POHr23x#bu z+&|n|TiS>x`D;$;40q||(B{P~A_gAtgp>K)XeC`)*+dPd)l8b&)E8HS^Gk`gmR4b9 zi!)REUdcpjUf=Op5TWRcp76q0Ay599DQgyzE$qtIUuSu36DHt6cC|Armbsm}vaV7)c&wj}PWK0gC)0NM t!Ez=46X-R+RHPx95JG9Dg)DJNEzp literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy3.2/03_map.pyc b/test/bytecode_pypy3.2/03_map.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13c3edded150a0257410d4f9e41e14b1997f365d GIT binary patch literal 742 zcmYk(X-^YT7{&2Z%33IUSqlh)){WK{iVGqrZYa2*qP%Ge&m^3rwG+lB`U!pGC-GBw zF7Z)kx&PlJb7yjAO2}q@{x~~2#eZ8`-qxA?(m=p;q)@dESX$|Xbt&Dj9;Fx7r(|IL z$^a~@48n$#Vc3W=3L8_#VL4?2HmOX(rj;4ktdfV#Df6%eV za@#2lJ6iM~?y=8#$Wdp7v(85hGcKmRWXAcPW6m|^osZe!e8qm}2aY?}Ip^GE_zf4& z-RFSwBXiCw=bca3={#W8dBh3l1{a)9+2wrALFXqfq zcD`V*^BqT=WzL8Ph48tOu%OyWrCe{sXZ4flYeCoeHzZ5pCN|?nR7+az^V6speT&bd oBw_1Iu~exw>+x#*JqhpGY__c3h_=F~O~M~ar}BUITq^9p0q?DC%m4rY literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy3.2/05_abc_class.pyc-notyet b/test/bytecode_pypy3.2/05_abc_class.pyc similarity index 100% rename from test/bytecode_pypy3.2/05_abc_class.pyc-notyet rename to test/bytecode_pypy3.2/05_abc_class.pyc diff --git a/test/bytecode_pypy3.2/05_const_map.pyc-notyet b/test/bytecode_pypy3.2/05_const_map.pyc similarity index 100% rename from test/bytecode_pypy3.2/05_const_map.pyc-notyet rename to test/bytecode_pypy3.2/05_const_map.pyc diff --git a/test/bytecode_pypy3.2/05_setattr.pyc b/test/bytecode_pypy3.2/05_setattr.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09de469250c00fe3c8305ce5cbfe25afaae418b5 GIT binary patch literal 402 zcmb7AO-sW-5Ph4dwWaw7g5W7A*rJFR5%J^TrI#KGVq0puNN8eeW+N8TQ~eG8C;e5v z*(!Q<*m=yGnSF2BE~3`x_}o8(CMCx tzr_#d3sDIU)LdEwm@U&b5!$8~>nHsz{3`!Tewz|!c`>ard+t5d;tTdiNrwOc literal 0 HcmV?d00001 diff --git a/test/bytecode_pypy3.2/06_list_ifnot_and.pyc-notyet b/test/bytecode_pypy3.2/06_list_ifnot_and.pyc similarity index 100% rename from test/bytecode_pypy3.2/06_list_ifnot_and.pyc-notyet rename to test/bytecode_pypy3.2/06_list_ifnot_and.pyc diff --git a/test/bytecode_pypy3.2/06_setif_comprehension.pyc-notyet b/test/bytecode_pypy3.2/06_setif_comprehension.pyc similarity index 100% rename from test/bytecode_pypy3.2/06_setif_comprehension.pyc-notyet rename to test/bytecode_pypy3.2/06_setif_comprehension.pyc diff --git a/test/bytecode_pypy3.2/06_tryifelse.pyc-notyet b/test/bytecode_pypy3.2/06_tryifelse.pyc similarity index 100% rename from test/bytecode_pypy3.2/06_tryifelse.pyc-notyet rename to test/bytecode_pypy3.2/06_tryifelse.pyc diff --git a/test/simple_source/bug27+/05_setattr.py b/test/simple_source/bug27+/05_setattr.py new file mode 100644 index 00000000..6b40862b --- /dev/null +++ b/test/simple_source/bug27+/05_setattr.py @@ -0,0 +1,9 @@ +# Ensure PyPy handling of: +# key, value in slotstate.items(). +# PyPy uses LOOKUP_METHOD and CALL_METHOD instead +# of LOAD_ATTR and CALL_FUNCTION +def bug(state, slotstate): + if state: + if slotstate is not None: + for key, value in slotstate.items(): + setattr(state, key, 2) diff --git a/test/simple_source/bug36/01_fstring.py b/test/simple_source/bug36/01_fstring.py new file mode 100644 index 00000000..d2bf6b29 --- /dev/null +++ b/test/simple_source/bug36/01_fstring.py @@ -0,0 +1,3 @@ +var1 = 'x' +var2 = 'y' +print(f'interpolate {var1} strings {var2} py36') diff --git a/test/simple_source/bug_pypy27/00_assign_pypy.py b/test/simple_source/bug_pypy27/00_assign_pypy.py new file mode 100644 index 00000000..08c22891 --- /dev/null +++ b/test/simple_source/bug_pypy27/00_assign_pypy.py @@ -0,0 +1,4 @@ +# From PyPy 2.6.1 datetime +# PyPy has simpler handling of assign3 and assign2 +i, n = 0, 1 +a, b, c = 1, 2, 3 diff --git a/test/simple_source/bug_pypy27/01_assert2.py b/test/simple_source/bug_pypy27/01_assert2.py new file mode 100644 index 00000000..7a7214e0 --- /dev/null +++ b/test/simple_source/bug_pypy27/01_assert2.py @@ -0,0 +1,17 @@ +# From PyPy argparse, dedent. + +# PyPY adds opcode JUMP_IF_NOT_DEBUG. +# This is the two argument form. +assert __name__ != '__main"', 'Indent decreased below 0.' + +# From PyPy simple_interact.py +# PyPy uses POP_JUMP_IF_FALSE as well as POP_JUMP_IF_TRUE +# CPython only uses POP_JUMP_IF_TRUE +while 1: + try: + more = 10 + except EOFError: + break + more = len(__file__) + assert not more, "FOO" + assert not more diff --git a/test/simple_source/bug_pypy27/03_try_return.py b/test/simple_source/bug_pypy27/03_try_return.py new file mode 100644 index 00000000..f8057ba5 --- /dev/null +++ b/test/simple_source/bug_pypy27/03_try_return.py @@ -0,0 +1,16 @@ +# From PyPy 2.7 argparse.py +# PyPY reduces branches as a result of the return statement +# So we need a new rules for trystmt and try_middle which we +# suffix with _pypy, e.g. trystmt_pypy, and try_middle_pypy +def call(self, string): + try: + return open(string, self, self._bufsize) + except IOError: + pass + +# From PyPy 2.6.1 function.py +def _call_funcptr(self, funcptr, *newargs): + try: + return self._build_result(self._restype_, result) + finally: + funcptr.free_temp_buffers() diff --git a/test/simple_source/stmts/03_if_elif.py b/test/simple_source/stmts/03_if_elif.py index 0520d6ff..798f86b2 100644 --- a/test/simple_source/stmts/03_if_elif.py +++ b/test/simple_source/stmts/03_if_elif.py @@ -2,14 +2,19 @@ # Bug in 2.6 is having multple COME_FROMs due to the # "and" in the "if" clause if __name__: - if __file__ and name: + if __file__ and __name__: pass - elif name: + elif __name__: pass # 2.6.9 transformer.py # Bug in 2.6 is multple COME_FROMs as a result # of the "or" in the "assert" + +# In PyPy the assert is handled via PyPy's unique JUMP_IF_NOT_DEBUG +# instruction. + +# Also note that the "else: pass" is superfluous if __name__: pass elif __file__: diff --git a/test/simple_source/stmts/09_whiletrue_bug.py b/test/simple_source/stmts/09_whiletrue_bug.py index 159c10cf..4d04a16e 100644 --- a/test/simple_source/stmts/09_whiletrue_bug.py +++ b/test/simple_source/stmts/09_whiletrue_bug.py @@ -1,9 +1,12 @@ -if args == ['-']: +if __file__ == ['-']: while True: try: - compile(filename, doraise=True) + compile(__file__, doraise=True) except RuntimeError: rv = 1 else: rv = 1 print(rv) + + +while 1:pass diff --git a/test/test_pyenvlib.py b/test/test_pyenvlib.py index 72ea73ad..b904b0f3 100755 --- a/test/test_pyenvlib.py +++ b/test/test_pyenvlib.py @@ -28,6 +28,7 @@ from fnmatch import fnmatch #----- configure this for your needs TEST_VERSIONS=('2.3.7', '2.4.6', '2.5.6', '2.6.9', 'pypy-2.6.1', + 'pypy-5.0.1', '2.7.10', '2.7.11', '3.2.6', '3.3.5', '3.4.2', '3.5.1') diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index 39f99ac7..ea98f7e1 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -78,13 +78,13 @@ for vers in (2.7, 3.4, 3.5, 3.6): test_options[key] = (os.path.join(src_dir, pythonlib), PYOC, key, vers) pass -for vers in (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6): +for vers in (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 'pypy3.2', 'pypy2.7'): bytecode = "bytecode_%s" % vers key = "bytecode-%s" % vers test_options[key] = (bytecode, PYC, bytecode, vers) key = "%s" % vers pythonlib = "python%s" % vers - if vers >= 3.0: + if isinstance(vers, float) and vers >= 3.0: pythonlib = os.path.join(pythonlib, '__pycache__') test_options[key] = (os.path.join(lib_prefix, pythonlib), PYOC, pythonlib, vers) diff --git a/uncompyle6/bin/uncompile.py b/uncompyle6/bin/uncompile.py index dd93f799..7e2a2cbe 100755 --- a/uncompyle6/bin/uncompile.py +++ b/uncompyle6/bin/uncompile.py @@ -65,7 +65,7 @@ def usage(): def main_bin(): if not (sys.version_info[0:2] in ((2, 6), (2, 7), (3, 2), (3, 3), (3, 4), (3, 5), (3, 6))): - print('Error: %s requires Python 2.6, 2.7, 3.2, 3.3, 3.4 or 3.5' % program, + print('Error: %s requires Python 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, or 3.6' % program, file=sys.stderr) sys.exit(-1) diff --git a/uncompyle6/disas.py b/uncompyle6/disas.py index 97bffbeb..435c89ab 100644 --- a/uncompyle6/disas.py +++ b/uncompyle6/disas.py @@ -28,7 +28,7 @@ from xdis.code import iscode from xdis.load import check_object_path, load_module from uncompyle6.scanner import get_scanner -def disco(version, co, out=None): +def disco(version, co, out=None, is_pypy=False): """ diassembles and deparses a given code block 'co' """ @@ -42,7 +42,7 @@ def disco(version, co, out=None): print('# Embedded file name: %s' % co.co_filename, file=real_out) - scanner = get_scanner(version) + scanner = get_scanner(version, is_pypy=is_pypy) queue = deque([co]) disco_loop(scanner.disassemble, queue, real_out) @@ -61,7 +61,7 @@ def disco_loop(disasm, queue, real_out): queue.append(t.pattr) elif iscode(t.attr): queue.append(t.attr) - print(t.format(), file=real_out) + print(t, file=real_out) pass pass @@ -82,7 +82,7 @@ def disassemble_file(filename, outstream=None, native=False): for con in co: disco(version, con, outstream) else: - disco(version, co, outstream) + disco(version, co, outstream, is_pypy=is_pypy) co = None def _test(): diff --git a/uncompyle6/main.py b/uncompyle6/main.py index 0cffb34e..f7c48e13 100644 --- a/uncompyle6/main.py +++ b/uncompyle6/main.py @@ -12,19 +12,20 @@ from xdis.load import load_module def uncompyle( version, co, out=None, showasm=False, showast=False, timestamp=None, showgrammar=False, code_objects={}, - is_pypy=False): + is_pypy=False, magic_int=None): """ disassembles and deparses a given code block 'co' """ - assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout co_pypy_str = 'PyPy ' if is_pypy else '' run_pypy_str = 'PyPy ' if IS_PYPY else '' - print('# %sPython bytecode %s (disassembled from %sPython %s)\n' % - (co_pypy_str, version, run_pypy_str, PYTHON_VERSION), + print('# %sPython bytecode %s%s disassembled from %sPython %s' % + (co_pypy_str, version, + " (%d)" % magic_int if magic_int else "", + run_pypy_str, PYTHON_VERSION), file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, @@ -61,11 +62,11 @@ def uncompyle_file(filename, outstream=None, showasm=False, showast=False, for con in co: uncompyle(version, con, outstream, showasm, showast, timestamp, showgrammar, code_objects=code_objects, - is_pypy=is_pypy) + is_pypy=is_pypy, magic_int=magic_int) else: uncompyle(version, co, outstream, showasm, showast, timestamp, showgrammar, code_objects=code_objects, - is_pypy=is_pypy) + is_pypy=is_pypy, magic_int=magic_int) co = None # FIXME: combine into an options parameter diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 8d775ab7..f897e2d5 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -30,6 +30,8 @@ class PythonParser(GenericASTBuilder): def add_unique_rule(self, rule, opname, count, customize): """Add rule to grammar, but only if it hasn't been added previously + opname and count are used in the customize() semantic the actions + to add the semantic action rule. Often, count is not used. """ if rule not in self.new_rules: # print("XXX ", rule) # debug @@ -39,6 +41,14 @@ class PythonParser(GenericASTBuilder): pass return + def add_unique_rules(self, rules, customize): + """Add rules to grammar + """ + for rule in rules: + opname = rule.split('::=')[0].strip() + self.add_unique_rule(rule, opname, 0, customize) + return + def cleanup(self): """ Remove recursive references to allow garbage @@ -62,14 +72,16 @@ class PythonParser(GenericASTBuilder): print("Instruction context:") for i in range(start, finish): indent = ' ' if i != index else '-> ' - print("%s%s" % (indent, instructions[i].format())) + print("%s%s" % (indent, instructions[i])) raise ParserError(err_token, err_token.offset) def typestring(self, token): return token.type def nonterminal(self, nt, args): - collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items', 'kwargs') + collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items', 'kwargs', + # PYPY: + 'kvlist_n') if nt in collect and len(args) > 1: # @@ -224,17 +236,12 @@ class PythonParser(GenericASTBuilder): stmt ::= augassign2 augassign1 ::= expr expr inplace_op designator augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR - augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0 - augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1 - augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2 - augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3 augassign2 ::= expr DUP_TOP LOAD_ATTR expr inplace_op ROT_TWO STORE_ATTR inplace_op ::= INPLACE_ADD inplace_op ::= INPLACE_SUBTRACT inplace_op ::= INPLACE_MULTIPLY - inplace_op ::= INPLACE_DIVIDE inplace_op ::= INPLACE_TRUE_DIVIDE inplace_op ::= INPLACE_FLOOR_DIVIDE inplace_op ::= INPLACE_MODULO @@ -261,7 +268,9 @@ class PythonParser(GenericASTBuilder): def p_forstmt(self, args): """ _for ::= GET_ITER FOR_ITER - _for ::= LOAD_CONST FOR_LOOP + + # Possibly before Python 2.3 + # _for ::= LOAD_CONST FOR_LOOP for_block ::= l_stmts_opt _come_from JUMP_BACK for_block ::= return_stmts _come_from @@ -327,8 +336,6 @@ class PythonParser(GenericASTBuilder): imports_cont ::= imports_cont import_cont imports_cont ::= import_cont import_cont ::= LOAD_CONST LOAD_CONST import_as_cont - import_as_cont ::= IMPORT_NAME_CONT designator - import_as_cont ::= IMPORT_NAME_CONT load_attrs designator import_as_cont ::= IMPORT_FROM designator load_attrs ::= LOAD_ATTR @@ -359,9 +366,6 @@ class PythonParser(GenericASTBuilder): stmt ::= setcomp_func - setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter - JUMP_BACK RETURN_VALUE RETURN_LAST - comp_iter ::= comp_if comp_iter ::= comp_ifnot comp_iter ::= comp_for @@ -369,9 +373,7 @@ class PythonParser(GenericASTBuilder): comp_body ::= set_comp_body comp_body ::= gen_comp_body comp_body ::= dict_comp_body - set_comp_body ::= expr SET_ADD gen_comp_body ::= expr YIELD_VALUE POP_TOP - dict_comp_body ::= expr expr MAP_ADD comp_if ::= expr jmp_false comp_iter comp_ifnot ::= expr jmp_true comp_iter @@ -382,7 +384,6 @@ class PythonParser(GenericASTBuilder): def p_expr(self, args): ''' expr ::= _mklambda - expr ::= SET_LINENO expr ::= LOAD_FAST expr ::= LOAD_NAME expr ::= LOAD_CONST @@ -399,19 +400,17 @@ class PythonParser(GenericASTBuilder): expr ::= unary_expr expr ::= call_function expr ::= unary_not - expr ::= unary_convert expr ::= binary_subscr expr ::= binary_subscr2 expr ::= load_attr expr ::= get_iter - expr ::= slice0 - expr ::= slice1 - expr ::= slice2 - expr ::= slice3 expr ::= buildslice2 expr ::= buildslice3 expr ::= yield + # Possibly Python < 2.3 + # expr ::= SET_LINENO + binary_expr ::= expr expr binary_op binary_op ::= BINARY_ADD binary_op ::= BINARY_MULTIPLY @@ -419,7 +418,6 @@ class PythonParser(GenericASTBuilder): binary_op ::= BINARY_OR binary_op ::= BINARY_XOR binary_op ::= BINARY_SUBTRACT - binary_op ::= BINARY_DIVIDE binary_op ::= BINARY_TRUE_DIVIDE binary_op ::= BINARY_FLOOR_DIVIDE binary_op ::= BINARY_MODULO @@ -433,21 +431,11 @@ class PythonParser(GenericASTBuilder): unary_op ::= UNARY_INVERT unary_not ::= expr UNARY_NOT - unary_convert ::= expr UNARY_CONVERT binary_subscr ::= expr expr BINARY_SUBSCR - binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR load_attr ::= expr LOAD_ATTR get_iter ::= expr GET_ITER - slice0 ::= expr SLICE+0 - slice0 ::= expr DUP_TOP SLICE+0 - slice1 ::= expr expr SLICE+1 - slice1 ::= expr expr DUP_TOPX_2 SLICE+1 - slice2 ::= expr expr SLICE+2 - slice2 ::= expr expr DUP_TOPX_2 SLICE+2 - slice3 ::= expr expr expr SLICE+3 - slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3 buildslice3 ::= expr expr expr BUILD_SLICE_3 buildslice2 ::= expr expr BUILD_SLICE_2 @@ -456,12 +444,6 @@ class PythonParser(GenericASTBuilder): _mklambda ::= load_closure mklambda _mklambda ::= mklambda - # Note: Python < 2.7 doesn't have *POP* or this. Remove from here? - # FIXME: segregate 2.7+ - - or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM - and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM - or ::= expr jmp_true expr come_from_opt and ::= expr jmp_false expr come_from_opt and2 ::= _jump jmp_false COME_FROM expr COME_FROM @@ -480,14 +462,6 @@ class PythonParser(GenericASTBuilder): ret_expr_or_cond ::= ret_cond ret_expr_or_cond ::= ret_cond_not - # Note: Python < 2.7 doesn't have *POP* or this. Remove from here? - # FIXME: segregate 2.7+ - - ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM - ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM - ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond - ret_cond_not ::= expr POP_JUMP_IF_TRUE expr RETURN_END_IF ret_expr_or_cond - stmt ::= return_lambda stmt ::= conditional_lambda @@ -499,15 +473,9 @@ class PythonParser(GenericASTBuilder): compare ::= expr expr COMPARE_OP cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP _come_from - cmp_list1 ::= expr DUP_TOP ROT_THREE - COMPARE_OP JUMP_IF_FALSE_OR_POP - cmp_list1 COME_FROM cmp_list1 ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false cmp_list1 _come_from - cmp_list1 ::= expr DUP_TOP ROT_THREE - COMPARE_OP JUMP_IF_FALSE_OR_POP - cmp_list2 COME_FROM cmp_list1 ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false cmp_list2 _come_from @@ -552,10 +520,6 @@ class PythonParser(GenericASTBuilder): designator ::= STORE_GLOBAL designator ::= STORE_DEREF designator ::= expr STORE_ATTR - designator ::= expr STORE_SLICE+0 - designator ::= expr expr STORE_SLICE+1 - designator ::= expr expr STORE_SLICE+2 - designator ::= expr expr expr STORE_SLICE+3 designator ::= store_subscr store_subscr ::= expr expr STORE_SUBSCR designator ::= unpack @@ -571,7 +535,7 @@ def parse(p, tokens, customize): def get_python_parser( - version, debug_parser, compile_mode='exec', + version, debug_parser={}, compile_mode='exec', is_pypy = False): """Returns parser object for Python version 2 or 3, 3.2, 3.5on, etc., depending on the parameters passed. *compile_mode* is either @@ -639,16 +603,12 @@ def get_python_parser( p = parse34.Python34Parser(debug_parser) else: p = parse34.Python34ParserSingle(debug_parser) - elif version >= 3.5: + elif version == 3.5: + import uncompyle6.parsers.parse35 as parse35 if compile_mode == 'exec': - p = parse3.Python35onParser(debug_parser) + p = parse35.Python35Parser(debug_parser) else: - p = parse3.Python35onParserSingle(debug_parser) - elif version >= 3.6: - if compile_mode == 'exec': - p = parse3.Python36Parser(debug_parser) - else: - p = parse3.Python36ParserSingle(debug_parser) + p = parse35.Python35ParserSingle(debug_parser) else: if compile_mode == 'exec': p = parse3.Python3Parser(debug_parser) diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 9e02cf34..b3899a23 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -40,6 +40,15 @@ class Python2Parser(PythonParser): print_nl ::= PRINT_NEWLINE ''' + def p_stmt2(self, args): + """ + while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM + + exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT + exec_stmt ::= expr exprlist EXEC_STMT + + """ + def p_print_to(self, args): ''' stmt ::= print_to @@ -84,8 +93,6 @@ class Python2Parser(PythonParser): raise_stmt3 ::= expr expr expr RAISE_VARARGS_3 stmt ::= exec_stmt - exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT - exec_stmt ::= expr exprlist EXEC_STMT stmt ::= assert stmt ::= assert2 @@ -202,18 +209,24 @@ class Python2Parser(PythonParser): genexpr ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 ''' - def p_import2(self, args): - ''' - # These might be relevant for only Python 2.0 or so. - # Not relevant for Python 3. - importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR - importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP - ''' + # def p_import2(self, args): + # ''' + # # These might be relevant for only Python 2.0 or so. + # importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR + # importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP + # import_as_cont ::= IMPORT_NAME_CONT designator + # import_as_cont ::= IMPORT_NAME_CONT load_attrs designator + # ''' def p_expr2(self, args): - ''' + """ expr ::= LOAD_LOCALS + expr ::= slice0 + expr ::= slice1 + expr ::= slice2 + expr ::= slice3 + expr ::= unary_convert slice0 ::= expr SLICE+0 slice0 ::= expr DUP_TOP SLICE+0 @@ -223,14 +236,42 @@ class Python2Parser(PythonParser): slice2 ::= expr expr DUP_TOPX_2 SLICE+2 slice3 ::= expr expr expr SLICE+3 slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3 + unary_convert ::= expr UNARY_CONVERT # In Python 3, DUP_TOPX_2 is DUP_TOP_TWO binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR - ''' + """ + + def p_slice2(self, args): + """ + designator ::= expr STORE_SLICE+0 + designator ::= expr expr STORE_SLICE+1 + designator ::= expr expr STORE_SLICE+2 + designator ::= expr expr expr STORE_SLICE+3 + augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0 + augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1 + augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2 + augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3 + slice0 ::= expr SLICE+0 + slice0 ::= expr DUP_TOP SLICE+0 + slice1 ::= expr expr SLICE+1 + slice1 ::= expr expr DUP_TOPX_2 SLICE+1 + slice2 ::= expr expr SLICE+2 + slice2 ::= expr expr DUP_TOPX_2 SLICE+2 + slice3 ::= expr expr expr SLICE+3 + slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3 + """ + + def p_op2(self, args): + """ + inplace_op ::= INPLACE_DIVIDE + binary_op ::= BINARY_DIVIDE + binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR + """ def add_custom_rules(self, tokens, customize): ''' - Special handling for opcodes that take a variable number + Special handling for opcodes such as those that take a variable number of arguments -- we add a new rule for each: build_list ::= {expr}^n BUILD_LIST_n @@ -246,62 +287,137 @@ class Python2Parser(PythonParser): expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP + + PyPy adds custom rules here as well ''' - for k, v in list(customize.items()): - op = k[:k.rfind('_')] - if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): + for opname, v in list(customize.items()): + opname_base = opname[:opname.rfind('_')] + if opname == 'PyPy': + self.addRule(""" + stmt ::= assign3_pypy + stmt ::= assign2_pypy + assign3_pypy ::= expr expr expr designator designator designator + assign2_pypy ::= expr expr designator designator + list_compr ::= expr BUILD_LIST_FROM_ARG _for designator list_iter + JUMP_BACK + """, nop_func) + continue + elif opname_base in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): thousands = (v//1024) thirty32s = ((v//32)%32) if thirty32s > 0: rule = "expr32 ::=%s" % (' expr' * 32) - self.add_unique_rule(rule, op, v, customize) + self.add_unique_rule(rule, opname_base, v, customize) self.seen32 = True if thousands > 0: self.add_unique_rule("expr1024 ::=%s" % (' expr32' * 32), - op, v, customize) + opname_base, v, customize) self.seen1024 = True rule = ('build_list ::= ' + 'expr1024 '*thousands + - 'expr32 '*thirty32s + 'expr '*(v%32) + k) - elif op == 'BUILD_MAP': - kvlist_n = "kvlist_%s" % v - rule = kvlist_n + ' ::= ' + ' kv3' * v - self.add_unique_rule(rule, op, v, customize) - rule = "mapexpr ::= %s %s" % (k, kvlist_n) - self.add_unique_rule(rule, op, v, customize) - elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): - rule = 'unpack ::= ' + k + ' designator'*v - elif op == 'UNPACK_LIST': - rule = 'unpack_list ::= ' + k + ' designator'*v - elif op in ('DUP_TOPX', 'RAISE_VARARGS'): - # no need to add a rule + 'expr32 '*thirty32s + 'expr '*(v%32) + opname) + elif opname == 'LOOKUP_METHOD': + # A PyPy speciality - DRY with parse3 + self.add_unique_rule("load_attr ::= expr LOOKUP_METHOD", + opname, v, customize) continue - # rule = 'dup_topx ::= ' + 'expr '*v + k - elif op == 'MAKE_FUNCTION': + elif opname == 'JUMP_IF_NOT_DEBUG': + self.add_unique_rules([ + 'jmp_true_false ::= POP_JUMP_IF_TRUE', + 'jmp_true_false ::= POP_JUMP_IF_FALSE', + "stmt ::= assert_pypy", + "stmt ::= assert2_pypy", + "assert_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true_false " + "LOAD_ASSERT RAISE_VARARGS_1 COME_FROM", + "assert2_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true_false " + "LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 COME_FROM", + ], customize) + continue + elif opname_base == 'BUILD_MAP': + if opname == 'BUILD_MAP_n': + # PyPy sometimes has no count. Sigh. + self.add_unique_rules([ + 'dictcomp_func ::= BUILD_MAP_n LOAD_FAST FOR_ITER designator ' + 'comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST', + 'kvlist_n ::= kvlist_n kv3', + 'kvlist_n ::=', + 'mapexpr ::= BUILD_MAP_n kvlist_n', + ], customize) + else: + kvlist_n = "kvlist_%s" % v + self.add_unique_rules([ + (kvlist_n + " ::=" + ' kv3' * v), + "mapexpr ::= %s %s" % (opname, kvlist_n) + ], customize) + continue + elif opname == 'SETUP_EXCEPT': + # FIXME: have a way here to detect PyPy. Right now we + # only have SETUP_EXCEPT customization for PyPy, but that might not + # always be the case. + self.add_unique_rules([ + "stmt ::= trystmt_pypy", + "trystmt_pypy ::= SETUP_EXCEPT suite_stmts_opt try_middle_pypy", + "try_middle_pypy ::= COME_FROM except_stmts END_FINALLY COME_FROM" + ], customize) + continue + elif opname == 'SETUP_FINALLY': + # FIXME: have a way here to detect PyPy. Right now we + # only have SETUP_EXCEPT customization for PyPy, but that might not + # always be the case. + self.add_unique_rules([ + "stmt ::= tryfinallystmt_pypy", + "tryfinallystmt_pypy ::= SETUP_FINALLY suite_stmts_opt COME_FROM " + "suite_stmts_opt END_FINALLY" + ], customize) + continue + elif opname_base in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): + rule = 'unpack ::= ' + opname + ' designator'*v + elif opname_base == 'UNPACK_LIST': + rule = 'unpack_list ::= ' + opname + ' designator'*v + elif opname_base in ('DUP_TOPX', 'RAISE_VARARGS'): + # FIXME: remove these conditions if they are not needed. + # no longer need to add a rule + continue + elif opname_base == 'MAKE_FUNCTION': self.addRule('mklambda ::= %s LOAD_LAMBDA %s' % - ('pos_arg '*v, k), nop_func) - rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k) - elif op == 'MAKE_CLOSURE': - self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' % - ('expr '*v, k), nop_func) - self.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' % - ('expr '*v, k), nop_func) - self.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' % - ('expr '*v, k), nop_func) - self.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' % - ('expr '*v, k), nop_func) - rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k) - # rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k) - elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', - 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): - na = (v & 0xff) # positional parameters - nk = (v >> 8) & 0xff # keyword parameters + ('pos_arg '*v, opname), nop_func) + rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, opname) + elif opname_base == 'MAKE_CLOSURE': + # FIXME: use add_unique_rules to tidy this up. + self.add_unique_rules([ + ('mklambda ::= %s load_closure LOAD_LAMBDA %s' % + ('expr '*v, opname)), + ('genexpr ::= %s load_closure LOAD_GENEXPR %s expr' + ' GET_ITER CALL_FUNCTION_1' % + ('expr '*v, opname)), + ('setcomp ::= %s load_closure LOAD_SETCOMP %s expr' + ' GET_ITER CALL_FUNCTION_1' % + ('expr '*v, opname)), + ('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr' + ' GET_ITER CALL_FUNCTION_1' % + ('expr '*v, opname)), + ('mkfunc ::= %s load_closure LOAD_CONST %s' % + ('expr '*v, opname))], + customize) + continue + elif opname_base in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', + 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): + args_pos = (v & 0xff) # positional parameters + args_kw = (v >> 8) & 0xff # keyword parameters # number of apply equiv arguments: - nak = ( len(op)-len('CALL_FUNCTION') ) // 3 - rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \ - + 'expr ' * nak + k + nak = ( len(opname_base)-len('CALL_FUNCTION') ) // 3 + rule = 'call_function ::= expr ' + 'expr '*args_pos + 'kwarg '*args_kw \ + + 'expr ' * nak + opname + elif opname_base == 'CALL_METHOD': + # PyPy only - DRY with parse3 + args_pos = (v & 0xff) # positional parameters + args_kw = (v >> 8) & 0xff # keyword parameters + # number of apply equiv arguments: + nak = ( len(opname_base)-len('CALL_METHOD') ) // 3 + rule = 'call_function ::= expr ' + 'expr '*args_pos + 'kwarg '*args_kw \ + + 'expr ' * nak + opname else: - raise Exception('unknown customize token %s' % k) - self.add_unique_rule(rule, op, v, customize) + raise Exception('unknown customize token %s' % opname) + self.add_unique_rule(rule, opname_base, v, customize) class Python2ParserSingle(Python2Parser, PythonParserSingle): pass diff --git a/uncompyle6/parsers/parse23.py b/uncompyle6/parsers/parse23.py index d3a3ba1d..f39f7954 100644 --- a/uncompyle6/parsers/parse23.py +++ b/uncompyle6/parsers/parse23.py @@ -14,9 +14,11 @@ class Python23Parser(Python24Parser): def p_misc23(self, args): ''' - _while1test ::= JUMP_FORWARD JUMP_IF_FALSE POP_TOP COME_FROM + # Used to keep semantic positions the same across later versions + # of Python + _while1test ::= SETUP_LOOP JUMP_FORWARD JUMP_IF_FALSE POP_TOP COME_FROM - while1stmt ::= SETUP_LOOP _while1test l_stmts JUMP_BACK + while1stmt ::= _while1test l_stmts_opt JUMP_BACK COME_FROM POP_TOP POP_BLOCK COME_FROM list_compr ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR designator list_iter del_stmt diff --git a/uncompyle6/parsers/parse24.py b/uncompyle6/parsers/parse24.py index 579fe4ce..92a798a2 100644 --- a/uncompyle6/parsers/parse24.py +++ b/uncompyle6/parsers/parse24.py @@ -26,6 +26,7 @@ class Python24Parser(Python25Parser): # Python 2.5+ omits POP_TOP POP_BLOCK while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK POP_TOP POP_BLOCK COME_FROM + while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_TOP POP_BLOCK COME_FROM # Python 2.5+: # call_stmt ::= expr POP_TOP diff --git a/uncompyle6/parsers/parse25.py b/uncompyle6/parsers/parse25.py index 29eeefe7..5a162f15 100644 --- a/uncompyle6/parsers/parse25.py +++ b/uncompyle6/parsers/parse25.py @@ -14,7 +14,6 @@ class Python25Parser(Python26Parser): def p_misc25(self, args): ''' - # If "return_if_stmt" is in a loop, a JUMP_BACK can be emitted. In 2.6 the # JUMP_BACK doesn't appear diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 25437282..5f3b4b1a 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -157,6 +157,8 @@ class Python26Parser(Python2Parser): iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK come_from_pop iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE come_from_pop + while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK COME_FROM + # Common with 2.7 while1stmt ::= SETUP_LOOP return_stmts bp_come_from while1stmt ::= SETUP_LOOP return_stmts COME_FROM @@ -201,7 +203,6 @@ class Python26Parser(Python2Parser): ret_cond_not ::= expr jmp_true expr RETURN_END_IF come_from_pop ret_expr_or_cond # FIXME: split into Python 2.5 - ret_cond ::= expr jmp_false expr JUMP_RETURN come_from_pop ret_expr_or_cond ret_or ::= expr jmp_true ret_expr_or_cond come_froms ''' @@ -224,3 +225,20 @@ if __name__ == '__main__': # Check grammar p = Python26Parser() p.checkGrammar() + from uncompyle6 import PYTHON_VERSION, IS_PYPY + if PYTHON_VERSION == 2.6: + lhs, rhs, tokens, right_recursive = p.checkSets() + from uncompyle6.scanner import get_scanner + s = get_scanner(PYTHON_VERSION, IS_PYPY) + opcode_set = set(s.opc.opname).union(set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP + LAMBDA_MARKER RETURN_LAST + """.split())) + remain_tokens = set(tokens) - opcode_set + import re + remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set(remain_tokens) - opcode_set + print(remain_tokens) + # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index aeede81e..b1330a6d 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -12,11 +12,17 @@ class Python27Parser(Python2Parser): super(Python27Parser, self).__init__(debug_parser) self.customized = {} - def p_list_comprehension27(self, args): + def p_comprehension27(self, args): """ list_for ::= expr _for designator list_iter JUMP_BACK - list_compr ::= expr BUILD_LIST_FROM_ARG _for designator list_iter JUMP_BACK + setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter + JUMP_BACK RETURN_VALUE RETURN_LAST + + dict_comp_body ::= expr expr MAP_ADD + set_comp_body ::= expr SET_ADD + + # See also common Python p_list_comprehension """ def p_try27(self, args): @@ -39,15 +45,34 @@ class Python27Parser(Python2Parser): def p_jump27(self, args): """ _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM + bp_come_from ::= POP_BLOCK COME_FROM + + # FIXME: Common with 3.0+ jmp_false ::= POP_JUMP_IF_FALSE jmp_true ::= POP_JUMP_IF_TRUE - bp_come_from ::= POP_BLOCK COME_FROM - """ + ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM + ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM + ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond + ret_cond_not ::= expr POP_JUMP_IF_TRUE expr RETURN_END_IF ret_expr_or_cond + + or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM + and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM + + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE_OR_POP + cmp_list1 COME_FROM + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE_OR_POP + cmp_list2 COME_FROM + """ def p_stmt27(self, args): """ + # assert condition assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 + + # assert condition, expr assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_2 withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt @@ -58,12 +83,13 @@ class Python27Parser(Python2Parser): POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY + while1stmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM + # Common with 2.6 while1stmt ::= SETUP_LOOP return_stmts bp_come_from while1stmt ::= SETUP_LOOP return_stmts COME_FROM """ - class Python27ParserSingle(Python27Parser, PythonParserSingle): pass @@ -71,3 +97,20 @@ if __name__ == '__main__': # Check grammar p = Python27Parser() p.checkGrammar() + from uncompyle6 import PYTHON_VERSION, IS_PYPY + if PYTHON_VERSION == 2.7: + lhs, rhs, tokens, right_recursive = p.checkSets() + from uncompyle6.scanner import get_scanner + s = get_scanner(PYTHON_VERSION, IS_PYPY) + opcode_set = set(s.opc.opname).union(set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP + LAMBDA_MARKER RETURN_LAST + """.split())) + remain_tokens = set(tokens) - opcode_set + import re + remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set(remain_tokens) - opcode_set + print(remain_tokens) + # p.dumpGrammar() diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 7338edf8..7d70e9d8 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -17,22 +17,18 @@ that a later phase can turn into a sequence of ASCII text. from __future__ import print_function -from uncompyle6.parser import PythonParser, PythonParserSingle +from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.astnode import AST from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6 import PYTHON3 class Python3Parser(PythonParser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): self.added_rules = set() - if PYTHON3: - super().__init__(AST, 'stmts', debug=debug_parser) - else: - super(Python3Parser, self).__init__(AST, 'stmts', debug=debug_parser) + super(Python3Parser, self).__init__(AST, 'stmts', debug=debug_parser) self.new_rules = set() - def p_list_comprehension3(self, args): + def p_comprehension3(self, args): """ # Python3 scanner adds LOAD_LISTCOMP. Python3 does list comprehension like # other comprehensions (set, dictionary). @@ -51,6 +47,11 @@ class Python3Parser(PythonParser): jb_or_c ::= JUMP_BACK jb_or_c ::= CONTINUE + setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter + JUMP_BACK RETURN_VALUE RETURN_LAST + dict_comp_body ::= expr expr MAP_ADD + set_comp_body ::= expr SET_ADD + # See also common Python p_list_comprehension """ @@ -89,10 +90,6 @@ class Python3Parser(PythonParser): raise_stmt2 ::= expr expr RAISE_VARARGS_2 raise_stmt3 ::= expr expr expr RAISE_VARARGS_3 - stmt ::= exec_stmt - exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT - exec_stmt ::= expr exprlist EXEC_STMT - stmt ::= assert stmt ::= assert2 stmt ::= ifstmt @@ -114,10 +111,6 @@ class Python3Parser(PythonParser): del_stmt ::= DELETE_FAST del_stmt ::= DELETE_NAME del_stmt ::= DELETE_GLOBAL - del_stmt ::= expr DELETE_SLICE+0 - del_stmt ::= expr expr DELETE_SLICE+1 - del_stmt ::= expr expr DELETE_SLICE+2 - del_stmt ::= expr expr expr DELETE_SLICE+3 del_stmt ::= delete_subscr delete_subscr ::= expr expr DELETE_SUBSCR del_stmt ::= expr DELETE_ATTR @@ -250,6 +243,10 @@ class Python3Parser(PythonParser): def p_misc3(self, args): """ try_middle ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY NOP COME_FROM + for_block ::= l_stmts + iflaststmtl ::= testexpr c_stmts_opt + iflaststmt ::= testexpr c_stmts_opt34 + c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt """ def p_jump3(self, args): @@ -258,6 +255,22 @@ class Python3Parser(PythonParser): come_froms ::= COME_FROM jmp_false ::= POP_JUMP_IF_FALSE jmp_true ::= POP_JUMP_IF_TRUE + + # FIXME: Common with 2.7 + ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM + ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM + ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond + ret_cond_not ::= expr POP_JUMP_IF_TRUE expr RETURN_END_IF ret_expr_or_cond + + or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM + and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM + + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE_OR_POP + cmp_list1 COME_FROM + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE_OR_POP + cmp_list2 COME_FROM """ def p_stmt3(self, args): @@ -298,14 +311,6 @@ class Python3Parser(PythonParser): binary_subscr2 ::= expr expr DUP_TOP_TWO BINARY_SUBSCR ''' - def p_misc3(self, args): - ''' - for_block ::= l_stmts - iflaststmtl ::= testexpr c_stmts_opt - iflaststmt ::= testexpr c_stmts_opt34 - c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt - ''' - @staticmethod def call_fn_name(token): """Customize CALL_FUNCTION to add the number of positional arguments""" @@ -360,7 +365,7 @@ class Python3Parser(PythonParser): call_function ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc {expr}^n-1 CALL_FUNCTION_n - """ + """ # Low byte indicates number of positional paramters, # high byte number of positional parameters args_pos = token.attr & 0xff @@ -385,7 +390,7 @@ class Python3Parser(PythonParser): def add_custom_rules(self, tokens, customize): """ - Special handling for opcodes that take a variable number + Special handling for opcodes such as those that take a variable number of arguments -- we add a new rule for each: unpack_list ::= UNPACK_LIST_n {expr}^n @@ -436,31 +441,41 @@ class Python3Parser(PythonParser): mkfunc ::= {pos_arg}^n [LOAD_CONST] MAKE_FUNCTION_n mklambda ::= {pos_arg}^n LOAD_LAMBDA [LOAD_CONST] MAKE_FUNCTION_n + For PYPY: + load_attr ::= expr LOOKUP_METHOD + call_function ::= expr CALL_METHOD """ + saw_format_value = False for i, token in enumerate(tokens): opname = token.type opname_base = opname[:opname.rfind('_')] - if opname in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', - 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): + if opname == 'PyPy': + self.addRule(""" + stmt ::= assign3_pypy + stmt ::= assign2_pypy + assign3_pypy ::= expr expr expr designator designator designator + assign2_pypy ::= expr expr designator designator + """, nop_func) + continue + elif opname == 'FORMAT_VALUE': + # Python 3.6+ + self.addRule(""" + formatted_value ::= LOAD_FAST FORMAT_VALUE + formatted_value ::= LOAD_NAME FORMAT_VALUE + str ::= LOAD_CONST + formatted_value_or_str ::= formatted_value + formatted_value_or_str ::= str + """, nop_func) + saw_format_value = True + + elif opname in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', + 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): self.custom_classfunc_rule(opname, token, customize) elif opname == 'LOAD_DICTCOMP': rule_pat = ("dictcomp ::= LOAD_DICTCOMP %sMAKE_FUNCTION_0 expr " "GET_ITER CALL_FUNCTION_1") self.add_make_function_rule(rule_pat, opname, token.attr, customize) - ## Custom rules which are handled now by the more generic rule in - ## either MAKE_FUNCTION or MAKE_CLOSURE - # elif opname == 'LOAD_GENEXPR': - # rule_pat = ("genexpr ::= LOAD_GENEXPR %sMAKE_FUNCTION_0 expr " - # "GET_ITER CALL_FUNCTION_1") - # self.add_make_function_rule(rule_pat, opname, token.attr, customize) - # rule_pat = ("genexpr ::= load_closure LOAD_GENEXPR %sMAKE_CLOSURE_0 expr " - # "GET_ITER CALL_FUNCTION_1") - # self.add_make_function_rule(rule_pat, opname, token.attr, customize) - # elif opname == 'LOAD_LISTCOMP': - # rule_pat = ("listcomp ::= LOAD_LISTCOMP %sMAKE_FUNCTION_0 expr " - # "GET_ITER CALL_FUNCTION_1") - # self.add_make_function_rule(rule_pat, opname, token.attr, customize) elif opname == 'LOAD_SETCOMP': # Should this be generalized and put under MAKE_FUNCTION? rule_pat = ("setcomp ::= LOAD_SETCOMP %sMAKE_FUNCTION_0 expr " @@ -476,9 +491,49 @@ class Python3Parser(PythonParser): if opname_base == 'BUILD_TUPLE': rule = ('load_closure ::= %s%s' % (('LOAD_CLOSURE ' * v), opname)) self.add_unique_rule(rule, opname, token.attr, customize) + if opname_base == 'BUILD_LIST' and saw_format_value: + format_or_str_n = "formatted_value_or_str_%s" % v + self.addRule(""" + expr ::= joined_str + joined_str ::= LOAD_CONST LOAD_ATTR %s CALL_FUNCTION_1 + %s ::= %s%s + """ % (format_or_str_n, format_or_str_n, ("formatted_value_or_str " *v), opname), + nop_func) + + elif opname == 'LOOKUP_METHOD': + # A PyPy speciality - DRY with parse2 + self.add_unique_rule("load_attr ::= expr LOOKUP_METHOD", + opname, token.attr, customize) + continue + elif opname == 'JUMP_IF_NOT_DEBUG': + self.add_unique_rule( + "stmt ::= assert_pypy", opname, v, customize) + self.add_unique_rule( + "stmt ::= assert2_pypy", opname_base, v, customize) + self.add_unique_rule( + "assert_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true " + "LOAD_ASSERT RAISE_VARARGS_1 COME_FROM", + opname, token.attr, customize) + self.add_unique_rule( + "assert2_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true " + "LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 COME_FROM", + opname_base, v, customize) + continue elif opname_base == 'BUILD_MAP': kvlist_n = "kvlist_%s" % token.attr - if self.version >= 3.5: + if opname == 'BUILD_MAP_n': + # PyPy sometimes has no count. Sigh. + rule = ('dictcomp_func ::= BUILD_MAP_n LOAD_FAST FOR_ITER designator ' + 'comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST') + self.add_unique_rule(rule, 'dictomp_func', 1, customize) + + kvlist_n = 'kvlist_n' + rule = 'kvlist_n ::= kvlist_n kv3' + self.add_unique_rule(rule, 'kvlist_n', 0, customize) + rule = 'kvlist_n ::=' + self.add_unique_rule(rule, 'kvlist_n', 1, customize) + rule = "mapexpr ::= BUILD_MAP_n kvlist_n" + elif self.version >= 3.5: rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2) self.add_unique_rule(rule, opname, token.attr, customize) rule = "mapexpr ::= %s %s" % (kvlist_n, opname) @@ -523,6 +578,17 @@ class Python3Parser(PythonParser): rule = ('mkfunc ::= kwargs %sexpr %s' % ('pos_arg ' * args_pos, opname)) self.add_unique_rule(rule, opname, token.attr, customize) + elif opname_base == 'CALL_METHOD': + # PyPy only - DRY with parse2 + args_pos = (token.attr & 0xff) # positional parameters + args_kw = (token.attr >> 8) & 0xff # keyword parameters + # number of apply equiv arguments: + nak = ( len(opname_base)-len('CALL_METHOD') ) // 3 + rule = ('call_function ::= expr ' + + ('pos_arg ' * args_pos) + + ('kwarg ' * args_kw) + + 'expr ' * nak + token.type) + self.add_unique_rule(rule, opname, token.attr, customize) elif opname.startswith('MAKE_CLOSURE'): # DRY with MAKE_FUNCTION # Note: this probably doesn't handle kwargs proprerly @@ -566,6 +632,7 @@ class Python3Parser(PythonParser): class Python32Parser(Python3Parser): + def p_32(self, args): """ # Store locals is only in Python 3.0 to 3.3 @@ -585,95 +652,6 @@ class Python33Parser(Python3Parser): yield_from ::= expr expr YIELD_FROM """ -class Python35onParser(Python3Parser): - def p_35on(self, args): - """ - # Python 3.5+ has WITH_CLEANUP_START/FINISH - - withstmt ::= expr SETUP_WITH exprlist suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - withasstmt ::= expr SETUP_WITH designator suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - inplace_op ::= INPLACE_MATRIX_MULTIPLY - binary_op ::= BINARY_MATRIX_MULTIPLY - - # Python 3.5+ does jump optimization - # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. - # in return_stmt, we will need the semantic actions in pysource.py - # to work out whether to dedent or not based on the presence of - # RETURN_END_IF vs RETURN_VALUE - - ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec - return_stmt ::= ret_expr RETURN_END_IF - - - # Python 3.3+ also has yield from. 3.5 does it - # differently than 3.3, 3.4 - - expr ::= yield_from - yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM - - # Python 3.4+ has more loop optimization that removes - # JUMP_FORWARD in some cases, and hence we also don't - # see COME_FROM - _ifstmts_jump ::= c_stmts_opt - - """ - - -class Python36Parser(Python3Parser): - - def p_36(self, args): - """ - - # Python 3.5+ has WITH_CLEANUP_START/FINISH - - withstmt ::= expr SETUP_WITH exprlist suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - withasstmt ::= expr SETUP_WITH designator suite_stmts_opt - POP_BLOCK LOAD_CONST COME_FROM - WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY - - inplace_op ::= INPLACE_MATRIX_MULTIPLY - binary_op ::= BINARY_MATRIX_MULTIPLY - - # Python 3.5+ does jump optimization - # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. - # in return_stmt, we will need the semantic actions in pysource.py - # to work out whether to dedent or not based on the presence of - # RETURN_END_IF vs RETURN_VALUE - - ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec - return_stmt ::= ret_expr RETURN_END_IF - - - # Python 3.3+ also has yield from. 3.5 does it - # differently than 3.3, 3.4 - - expr ::= yield_from - yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM - - # Python 3.4+ has more loop optimization that removes - # JUMP_FORWARD in some cases, and hence we also don't - # see COME_FROM - _ifstmts_jump ::= c_stmts_opt - """ - - class Python3ParserSingle(Python3Parser, PythonParserSingle): pass @@ -685,21 +663,15 @@ class Python32ParserSingle(Python32Parser, PythonParserSingle): class Python33ParserSingle(Python33Parser, PythonParserSingle): pass -class Python35onParserSingle(Python35onParser, PythonParserSingle): - pass - -class Python36ParserSingle(Python36Parser, PythonParserSingle): - pass - def info(args): # Check grammar # Should also add a way to dump grammar - import sys p = Python3Parser() if len(args) > 0: arg = args[0] if arg == '3.5': - p = Python35onParser() + from uncompyle6.parser.parse35 import Python35Parser + p = Python35Parser() elif arg == '3.3': p = Python33Parser() elif arg == '3.2': diff --git a/uncompyle6/parsers/parse34.py b/uncompyle6/parsers/parse34.py index 280d2bd4..1ef1f7b8 100644 --- a/uncompyle6/parsers/parse34.py +++ b/uncompyle6/parsers/parse34.py @@ -46,13 +46,24 @@ class Python34ParserSingle(Python34Parser, PythonParserSingle): pass -def info(args): +if __name__ == '__main__': # Check grammar - # Should also add a way to dump grammar p = Python34Parser() p.checkGrammar() - - -if __name__ == '__main__': - import sys - info(sys.argv) + from uncompyle6 import PYTHON_VERSION, IS_PYPY + if PYTHON_VERSION == 3.4: + lhs, rhs, tokens, right_recursive = p.checkSets() + from uncompyle6.scanner import get_scanner + s = get_scanner(PYTHON_VERSION, IS_PYPY) + opcode_set = set(s.opc.opname).union(set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME + LAMBDA_MARKER RETURN_LAST + """.split())) + remain_tokens = set(tokens) - opcode_set + import re + remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set(remain_tokens) - opcode_set + print(remain_tokens) + # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse35.py b/uncompyle6/parsers/parse35.py new file mode 100644 index 00000000..66407ca2 --- /dev/null +++ b/uncompyle6/parsers/parse35.py @@ -0,0 +1,78 @@ +# Copyright (c) 2016 Rocky Bernstein +""" +spark grammar differences over Python3 for Python 3.5. +""" +from __future__ import print_function + +from uncompyle6.parser import PythonParserSingle +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.parsers.parse3 import Python3Parser + +class Python35Parser(Python3Parser): + + def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): + super(Python35Parser, self).__init__(debug_parser) + self.customized = {} + + def p_35on(self, args): + """ + # Python 3.5+ has WITH_CLEANUP_START/FINISH + + withstmt ::= expr SETUP_WITH exprlist suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + withasstmt ::= expr SETUP_WITH designator suite_stmts_opt + POP_BLOCK LOAD_CONST COME_FROM + WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY + + inplace_op ::= INPLACE_MATRIX_MULTIPLY + binary_op ::= BINARY_MATRIX_MULTIPLY + + # Python 3.5+ does jump optimization + # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. + # in return_stmt, we will need the semantic actions in pysource.py + # to work out whether to dedent or not based on the presence of + # RETURN_END_IF vs RETURN_VALUE + + ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec + + # Python 3.3+ also has yield from. 3.5 does it + # differently than 3.3, 3.4 + + expr ::= yield_from + yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM + + # Python 3.4+ has more loop optimization that removes + # JUMP_FORWARD in some cases, and hence we also don't + # see COME_FROM + _ifstmts_jump ::= c_stmts_opt + """ +class Python35ParserSingle(Python35Parser, PythonParserSingle): + pass + +if __name__ == '__main__': + # Check grammar + p = Python35Parser() + p.checkGrammar() + from uncompyle6 import PYTHON_VERSION, IS_PYPY + if PYTHON_VERSION == 3.5: + lhs, rhs, tokens, right_recursive = p.checkSets() + from uncompyle6.scanner import get_scanner + s = get_scanner(PYTHON_VERSION, IS_PYPY) + opcode_set = set(s.opc.opname).union(set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME + LAMBDA_MARKER RETURN_LAST + """.split())) + remain_tokens = set(tokens) - opcode_set + import re + remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set(remain_tokens) - opcode_set + print(remain_tokens) + # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/parse36.py b/uncompyle6/parsers/parse36.py new file mode 100644 index 00000000..936fabd4 --- /dev/null +++ b/uncompyle6/parsers/parse36.py @@ -0,0 +1,52 @@ +# Copyright (c) 2016 Rocky Bernstein +""" +spark grammar differences over Python 3.5 for Python 3.6. +""" +from __future__ import print_function + +from uncompyle6.parser import PythonParserSingle +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.parsers.parse35 import Python35Parser + +class Python36Parser(Python35Parser): + + def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): + super(Python36Parser, self).__init__(debug_parser) + self.customized = {} + + def p_36misc(self, args): + """ + formatted_value ::= LOAD_FAST FORMAT_VALUE + str ::= LOAD_CONST + joined_str ::= LOAD_CONST LOAD_ATTR format_value_or_strs + BUILD_LIST CALL_FUNCTION + format_value_or_strs ::= format_value_or_strs format_value_or_str + format_value_or_strs ::= format_value_or_str + format_value_or_str ::= format_value + format_value_or_str ::= str + """ + +class Python36ParserSingle(Python36Parser, PythonParserSingle): + pass + +if __name__ == '__main__': + # Check grammar + p = Python36Parser() + p.checkGrammar() + from uncompyle6 import PYTHON_VERSION, IS_PYPY + if PYTHON_VERSION == 3.6: + lhs, rhs, tokens, right_recursive = p.checkSets() + from uncompyle6.scanner import get_scanner + s = get_scanner(PYTHON_VERSION, IS_PYPY) + opcode_set = set(s.opc.opname).union(set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME + LAMBDA_MARKER RETURN_LAST + """.split())) + remain_tokens = set(tokens) - opcode_set + import re + remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set(remain_tokens) - opcode_set + print(remain_tokens) + # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 23490e90..1a0d8758 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -18,7 +18,7 @@ from __future__ import print_function import sys -from uncompyle6 import PYTHON3 +from uncompyle6 import PYTHON3, IS_PYPY from uncompyle6.scanners.tok import Token # The byte code versions we support @@ -51,9 +51,10 @@ class Scanner(object): def __init__(self, version, show_asm=None, is_pypy=False): self.version = version self.show_asm = show_asm + self.is_pypy = is_pypy if version in PYTHON_VERSIONS: - if is_pypy and version != 3.2: + if is_pypy: v_str = "opcode_pypy%s" % (int(version * 10)) else: v_str = "opcode_%s" % (int(version * 10)) @@ -250,11 +251,13 @@ class Scanner(object): self.Token = tokenClass return self.Token +def op_has_argument(op, opc): + return op >= opc.HAVE_ARGUMENT def parse_fn_counts(argc): return ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) -def get_scanner(version, show_asm=None, is_pypy=False): +def get_scanner(version, is_pypy=False, show_asm=None): # Pick up appropriate scanner if version in PYTHON_VERSIONS: v_str = "%s" % (int(version * 10)) @@ -281,5 +284,5 @@ def get_scanner(version, show_asm=None, is_pypy=False): if __name__ == "__main__": import inspect, uncompyle6 co = inspect.currentframe().f_code - scanner = get_scanner(uncompyle6.PYTHON_VERSION, True) + scanner = get_scanner(uncompyle6.PYTHON_VERSION, IS_PYPY, True) tokens, customize = scanner.disassemble(co, {}) diff --git a/uncompyle6/scanners/pypy27.py b/uncompyle6/scanners/pypy27.py new file mode 100644 index 00000000..4ecfd628 --- /dev/null +++ b/uncompyle6/scanners/pypy27.py @@ -0,0 +1,26 @@ +# Copyright (c) 2016 by Rocky Bernstein +""" +Python PyPy 2.7 bytecode scanner/deparser + +This overlaps Python's 2.7's dis module, but it can be run from +Python 3 and other versions of Python. Also, we save token +information for later use in deparsing. +""" + +import uncompyle6.scanners.scanner27 as scan + +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_pypy27 +JUMP_OPs = opcode_pypy27.JUMP_OPs + +# We base this off of 2.6 instead of the other way around +# because we cleaned things up this way. +# The history is that 2.7 support is the cleanest, +# then from that we got 2.6 and so on. +class ScannerPyPy27(scan.Scanner27): + def __init__(self, show_asm): + # There are no differences in initialization between + # pypy 2.7 and 2.7 + scan.Scanner27.__init__(self, show_asm, is_pypy=True) + self.version = 2.7 + return diff --git a/uncompyle6/scanners/pypy32.py b/uncompyle6/scanners/pypy32.py new file mode 100644 index 00000000..264708ef --- /dev/null +++ b/uncompyle6/scanners/pypy32.py @@ -0,0 +1,26 @@ +# Copyright (c) 2016 by Rocky Bernstein +""" +Python PyPy 3.2 bytecode scanner/deparser + +This overlaps Python's 3.2's dis module, but it can be run from +Python 3 and other versions of Python. Also, we save token +information for later use in deparsing. +""" + +import uncompyle6.scanners.scanner32 as scan + +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_32 as opc # is this rgith? +JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) + +# We base this off of 2.6 instead of the other way around +# because we cleaned things up this way. +# The history is that 2.7 support is the cleanest, +# then from that we got 2.6 and so on. +class ScannerPyPy32(scan.Scanner32): + def __init__(self, show_asm): + # There are no differences in initialization between + # pypy 3.2 and 3.2 + scan.Scanner32.__init__(self, show_asm, is_pypy=True) + self.version = 3.2 + return diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index 53752522..0d7c7645 100755 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -41,14 +41,18 @@ class Scanner2(scan.Scanner): def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ - Disassemble a Python 2 code object, returning a list of 'Token'. - Various tranformations are made to assist the deparsing grammar. - For example: + Pick out tokens from an uncompyle6 code object, and transform them, + returning a list of uncompyle6 'Token's. + + The tranformations are made to assist the deparsing grammar. + Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments - The main part of this procedure is modelled after - dis.disassemble(). + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + + Also, when we encounter certain tokens, we add them to a set which will cause custom + grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST + cause specific rules for the specific number of arguments they take. """ show_asm = self.show_asm if not show_asm else show_asm @@ -63,6 +67,9 @@ class Scanner2(scan.Scanner): tokens = [] customize = {} + if self.is_pypy: + customize['PyPy'] = 1; + Token = self.Token # shortcut n = self.setup_code(co) @@ -93,10 +100,21 @@ class Scanner2(scan.Scanner): # 'LOAD_ASSERT' is used in assert statements. self.load_asserts = set() for i in self.op_range(0, n): - # We need to detect the difference between - # "raise AssertionError" and - # "assert" - if self.code[i] == self.opc.PJIT and self.code[i+3] == self.opc.LOAD_GLOBAL: + # We need to detect the difference between: + # raise AssertionError + # and + # assert ... + # Below we use the heuristic that it is preceded by a POP_JUMP. + # however we could also use followed by RAISE_VARARGS + # or for PyPy there may be a JUMP_IF_NOT_DEBUG before. + # FIXME: remove uses of PJIF, and PJIT + if self.is_pypy: + have_pop_jump = self.code[i] in (self.opc.PJIF, + self.opc.PJIT) + else: + have_pop_jump = self.code[i] == self.opc.PJIT + + if have_pop_jump and self.code[i+3] == self.opc.LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) @@ -185,12 +203,34 @@ class Scanner2(scan.Scanner): self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE: continue else: - opname = '%s_%d' % (opname, oparg) + if self.is_pypy and not oparg and opname == 'BUILD_MAP': + opname = 'BUILD_MAP_n' + else: + opname = '%s_%d' % (opname, oparg) if op != self.opc.BUILD_SLICE: customize[opname] = oparg + elif self.is_pypy and opname in ('LOOKUP_METHOD', + 'JUMP_IF_NOT_DEBUG', + 'SETUP_EXCEPT', + 'SETUP_FINALLY'): + # The value in the dict is in special cases in semantic actions, such + # as CALL_FUNCTION. The value is not used in these cases, so we put + # in arbitrary value 0. + customize[opname] = 0 elif op == self.opc.JUMP_ABSOLUTE: + # Further classify JUMP_ABSOLUTE into backward jumps + # which are used in loops, and "CONTINUE" jumps which + # may appear in a "continue" statement. The loop-type + # and continue-type jumps will help us classify loop + # boundaries The continue-type jumps help us get + # "continue" statements with would otherwise be turned + # into a "pass" statement because JUMPs are sometimes + # ignored in rules as just boundary overhead. In + # comprehensions we might sometimes classify JUMP_BACK + # as CONTINUE, but that's okay since we add a grammar + # rule for that. target = self.get_target(offset) - if target < offset: + if target <= offset: if (offset in self.stmts and self.code[offset+3] not in (self.opc.END_FINALLY, self.opc.POP_BLOCK) @@ -213,16 +253,18 @@ class Scanner2(scan.Scanner): if offset not in replace: tokens.append(Token( - opname, oparg, pattr, offset, linestart, op, has_arg)) + opname, oparg, pattr, offset, linestart, op, + has_arg, self.opc)) else: tokens.append(Token( - replace[offset], oparg, pattr, offset, linestart, op, has_arg)) + replace[offset], oparg, pattr, offset, linestart, + op, has_arg, self.opc)) pass pass if show_asm in ('both', 'after'): for t in tokens: - print(t.format()) + print(t) print() return tokens, customize @@ -834,7 +876,7 @@ if __name__ == "__main__": from uncompyle6 import PYTHON_VERSION tokens, customize = Scanner2(PYTHON_VERSION).disassemble(co) for t in tokens: - print(t.format()) + print(t) else: print("Need to be Python 3.2 or greater to demo; I am %s." % PYTHON_VERSION) diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 2dca3683..cb05beb9 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -284,10 +284,12 @@ class Scanner26(scan.Scanner2): if offset not in replace: tokens.append(Token( - op_name, oparg, pattr, offset, linestart, op, has_arg)) + op_name, oparg, pattr, offset, linestart, op, + has_arg, self.opc)) else: tokens.append(Token( - replace[offset], oparg, pattr, offset, linestart, op, has_arg)) + replace[offset], oparg, pattr, offset, linestart, op, + has_arg, self.opc)) pass pass diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index af1320b4..15ee3d73 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -52,7 +52,7 @@ class Scanner27(Scanner2): # opcodes with expect a variable number pushed values whose # count is in the opcode. For parsing we generally change the # opcode name to include that number. - self.varargs_ops = frozenset([ + varargs_ops = set([ self.opc.BUILD_LIST, self.opc.BUILD_TUPLE, self.opc.BUILD_SLICE, self.opc.UNPACK_SEQUENCE, self.opc.MAKE_FUNCTION, self.opc.CALL_FUNCTION, @@ -62,6 +62,10 @@ class Scanner27(Scanner2): # New in Python 2.7 self.opc.BUILD_SET, self.opc.BUILD_MAP]) + if is_pypy: + varargs_ops.add(self.opc.CALL_METHOD) + self.varargs_ops = frozenset(varargs_ops) + # "setup" opcodes self.setup_ops = frozenset([ self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY, @@ -104,7 +108,7 @@ if __name__ == "__main__": co = inspect.currentframe().f_code tokens, customize = Scanner27().disassemble(co) for t in tokens: - print(t.format()) + print(t) pass else: print("Need to be Python 2.7 to demo; I am %s." % diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 8768962a..d0034afb 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -25,6 +25,7 @@ from __future__ import print_function from collections import namedtuple from array import array +from uncompyle6.scanner import Scanner, op_has_argument from xdis.code import iscode from xdis.bytecode import Bytecode from uncompyle6.scanner import Token, parse_fn_counts @@ -42,9 +43,7 @@ globals().update(op3.opmap) # POP_JUMP_IF is used by verify POP_JUMP_TF = (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE) -import uncompyle6.scanner as scan - -class Scanner3(scan.Scanner): +class Scanner3(Scanner): def __init__(self, version, show_asm=None, is_pypy=False): super(Scanner3, self).__init__(version, show_asm, is_pypy) @@ -89,13 +88,15 @@ class Scanner3(scan.Scanner): # Opcodes that take a variable number of arguments # (expr's) - self.varargs = frozenset([self.opc.BUILD_LIST, - self.opc.BUILD_TUPLE, - self.opc.BUILD_SET, - self.opc.BUILD_SLICE, - self.opc.BUILD_MAP, - self.opc.UNPACK_SEQUENCE, - self.opc.RAISE_VARARGS]) + varargs_ops = set([ + self.opc.BUILD_LIST, self.opc.BUILD_TUPLE, + self.opc.BUILD_SET, self.opc.BUILD_SLICE, + self.opc.BUILD_MAP, self.opc.UNPACK_SEQUENCE, + self.opc.RAISE_VARARGS]) + + if is_pypy: + varargs_ops.add(self.opc.CALL_METHOD) + self.varargs_ops = frozenset(varargs_ops) # Not really a set, but still clasification-like self.statement_opcode_sequences = [ @@ -107,14 +108,18 @@ class Scanner3(scan.Scanner): def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ - Disassemble a Python 3 code object, returning a list of 'Token'. - Various tranformations are made to assist the deparsing grammar. - For example: + Pick out tokens from an uncompyle6 code object, and transform them, + returning a list of uncompyle6 'Token's. + + The tranformations are made to assist the deparsing grammar. + Specificially: - various types of LOAD_CONST's are categorized in terms of what they load - COME_FROM instructions are added to assist parsing control structures - - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments - The main part of this procedure is modelled after - dis.disassemble(). + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments + + Also, when we encounter certain tokens, we add them to a set which will cause custom + grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST + cause specific rules for the specific number of arguments they take. """ show_asm = self.show_asm if not show_asm else show_asm @@ -127,6 +132,10 @@ class Scanner3(scan.Scanner): # Container for tokens tokens = [] + customize = {} + if self.is_pypy: + customize['PyPy'] = 1; + self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() @@ -174,7 +183,7 @@ class Scanner3(scan.Scanner): for jump_offset in jump_targets[inst.offset]: tokens.append(Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx), - has_arg = True)) + has_arg = True, opc=self.opc)) jump_idx += 1 pass pass @@ -224,13 +233,22 @@ class Scanner3(scan.Scanner): offset = inst.offset, linestart = inst.starts_line, op = op, - has_arg = (op >= op3.HAVE_ARGUMENT) + has_arg = op_has_argument(op, op3), + opc = self.opc ) ) continue - elif op in self.varargs: + elif op in self.varargs_ops: pos_args = inst.argval - opname = '%s_%d' % (opname, pos_args) + if self.is_pypy and not pos_args and opname == 'BUILD_MAP': + opname = 'BUILD_MAP_n' + else: + opname = '%s_%d' % (opname, pos_args) + elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'): + # The value in the dict is in special cases in semantic actions, such + # as CALL_FUNCTION. The value is not used in these cases, so we put + # in arbitrary value 0. + customize[opname] = 0 elif opname == 'UNPACK_EX': # FIXME: try with scanner and parser by # changing inst.argval @@ -240,7 +258,7 @@ class Scanner3(scan.Scanner): argval = (before_args, after_args) opname = '%s_%d+%d' % (opname, before_args, after_args) elif op == self.opc.JUMP_ABSOLUTE: - # Further classifhy JUMP_ABSOLUTE into backward jumps + # Further classify JUMP_ABSOLUTE into backward jumps # which are used in loops, and "CONTINUE" jumps which # may appear in a "continue" statement. The loop-type # and continue-type jumps will help us classify loop @@ -283,16 +301,17 @@ class Scanner3(scan.Scanner): offset = inst.offset, linestart = inst.starts_line, op = op, - has_arg = (op >= op3.HAVE_ARGUMENT) + has_arg = (op >= op3.HAVE_ARGUMENT), + opc = self.opc ) ) pass if show_asm in ('both', 'after'): for t in tokens: - print(t.format()) + print(t) print() - return tokens, {} + return tokens, customize def build_lines_data(self, code_obj): """ @@ -383,7 +402,7 @@ class Scanner3(scan.Scanner): # Determine structures and fix jumps in Python versions # since 2.3 - self.detect_structure(offset) + self.detect_structure(offset, targets) has_arg = (op >= op3.HAVE_ARGUMENT) if has_arg: @@ -497,7 +516,7 @@ class Scanner3(scan.Scanner): target += offset + 3 return target - def detect_structure(self, offset): + def detect_structure(self, offset, targets): """ Detect structures and their boundaries to fix optimized jumps in python2.3+ @@ -715,6 +734,33 @@ class Scanner3(scan.Scanner): self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) + # It is important to distingish if this return is inside some sort + # except block return + jump_prev = prev_op[offset] + if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: + if self.opc.cmp_op[code[jump_prev+1]] == 'exception match': + return + if self.version >= 3.5: + # Python 3.5 may remove as dead code a JUMP + # instruction after a RETURN_VALUE. So we check + # based on seeing SETUP_EXCEPT various places. + if code[rtarget] == self.opc.SETUP_EXCEPT: + return + # Check that next instruction after pops and jump is + # not from SETUP_EXCEPT + next_op = rtarget + if code[next_op] == self.opc.POP_BLOCK: + next_op += self.op_size(self.code[next_op]) + if code[next_op] == self.opc.JUMP_ABSOLUTE: + next_op += self.op_size(self.code[next_op]) + if next_op in targets: + for try_op in targets[next_op]: + come_from_op = code[try_op] + if come_from_op == self.opc.SETUP_EXCEPT: + return + pass + pass + pass self.return_end_ifs.add(prev_op[rtarget]) elif op in self.jump_if_pop: @@ -784,7 +830,7 @@ if __name__ == "__main__": from uncompyle6 import PYTHON_VERSION tokens, customize = Scanner3(PYTHON_VERSION).disassemble(co) for t in tokens: - print(t.format()) + print(t) else: print("Need to be Python 3.2 or greater to demo; I am %s." % PYTHON_VERSION) diff --git a/uncompyle6/scanners/scanner35.py b/uncompyle6/scanners/scanner35.py index d1349201..715230ff 100644 --- a/uncompyle6/scanners/scanner35.py +++ b/uncompyle6/scanners/scanner35.py @@ -28,7 +28,7 @@ if __name__ == "__main__": co = inspect.currentframe().f_code tokens, customize = Scanner35().disassemble(co) for t in tokens: - print(t.format()) + print(t) pass else: print("Need to be Python 3.5 to demo; I am %s." % diff --git a/uncompyle6/scanners/tok.py b/uncompyle6/scanners/tok.py index 13615468..f9879334 100644 --- a/uncompyle6/scanners/tok.py +++ b/uncompyle6/scanners/tok.py @@ -2,7 +2,7 @@ # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock -import sys +import re, sys from uncompyle6 import PYTHON3 if PYTHON3: @@ -21,7 +21,7 @@ class Token: # attr = argval # pattr = argrepr def __init__(self, type_, attr=None, pattr=None, offset=-1, - linestart=None, op=None, has_arg=None): + linestart=None, op=None, has_arg=None, opc=None): self.type = intern(type_) self.op = op self.has_arg = has_arg @@ -29,6 +29,10 @@ class Token: self.pattr = pattr self.offset = offset self.linestart = linestart + if has_arg == False: + self.attr = None + self.pattr = None + self.opc = opc def __eq__(self, o): """ '==', but it's okay if offsets and linestarts are different""" @@ -42,20 +46,38 @@ class Token: def __repr__(self): return str(self.type) - def __str__(self): - pattr = self.pattr if self.pattr is not None else '' - prefix = '\n%3d ' % self.linestart if self.linestart else (' ' * 6) - return (prefix + - ('%9s %-18s %r' % (self.offset, self.type, pattr))) + # def __str__(self): + # pattr = self.pattr if self.pattr is not None else '' + # prefix = '\n%3d ' % self.linestart if self.linestart else (' ' * 6) + # return (prefix + + # ('%9s %-18s %r' % (self.offset, self.type, pattr))) - def format(self): - prefix = '\n%3d ' % self.linestart if self.linestart else (' ' * 6) - offset_opname = '%9s %-18s' % (self.offset, self.type) - argstr = "%6d " % self.attr if isinstance(self.attr, int) else (' '*7) - if self.has_arg: - return "%s%s%s %r" % (prefix, offset_opname, argstr, self.pattr) - else: + def __str__(self): + prefix = '\n%4d ' % self.linestart if self.linestart else (' ' * 6) + offset_opname = '%6s %-17s' % (self.offset, self.type) + if not self.has_arg: return "%s%s" % (prefix, offset_opname) + argstr = "%6d " % self.attr if isinstance(self.attr, int) else (' '*7) + if self.pattr: + pattr = self.pattr + if self.opc: + if self.op in self.opc.hasjrel: + pattr = "to " + self.pattr + elif self.op in self.opc.hasjabs: + self.pattr= str(self.pattr) + if not self.pattr.startswith('to '): + pattr = "to " + str(self.pattr) + pass + elif self.op in self.opc.hascompare: + if isinstance(self.attr, int): + pattr = self.opc.cmp_op[self.attr] + # And so on. See xdis/bytecode.py get_instructions_bytes + pass + elif re.search('_\d+$', self.type): + return "%s%s%s" % (prefix, offset_opname, argstr) + else: + pattr = '' + return "%s%s%s %r" % (prefix, offset_opname, argstr, pattr) def __hash__(self): return hash(self.type) diff --git a/uncompyle6/semantics/aligner.py b/uncompyle6/semantics/aligner.py new file mode 100644 index 00000000..eb154062 --- /dev/null +++ b/uncompyle6/semantics/aligner.py @@ -0,0 +1,147 @@ +import sys +from uncompyle6.semantics.pysource import ( + SourceWalker, SourceWalkerError, find_globals, ASSIGN_DOC_STRING, RETURN_NONE) +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +class AligningWalker(SourceWalker, object): + def __init__(self, version, scanner, out, showast=False, + debug_parser=PARSER_DEFAULT_DEBUG, + compile_mode='exec', is_pypy=False): + SourceWalker.__init__(self, version, out, scanner, showast, debug_parser, + compile_mode, is_pypy) + self.desired_line_number = 0 + self.current_line_number = 0 + + def println(self, *data): + if data and not(len(data) == 1 and data[0] ==''): + self.write(*data) + + self.pending_newlines = max(self.pending_newlines, 1) + + def write(self, *data): + from trepan.api import debug; debug() + if (len(data) == 1) and data[0] == self.indent: + diff = max(self.pending_newlines, + self.desired_line_number - self.current_line_number) + self.f.write('\n'*diff) + self.current_line_number += diff + self.pending_newlines = 0 + if (len(data) == 0) or (len(data) == 1 and data[0] == ''): + return + + out = ''.join((str(j) for j in data)) + n = 0 + for i in out: + if i == '\n': + n += 1 + if n == len(out): + self.pending_newlines = max(self.pending_newlines, n) + return + elif n: + self.pending_newlines = max(self.pending_newlines, n) + out = out[n:] + break + else: + break + + if self.pending_newlines > 0: + diff = max(self.pending_newlines, + self.desired_line_number - self.current_line_number) + self.f.write('\n'*diff) + self.current_line_number += diff + self.pending_newlines = 0 + + for i in out[::-1]: + if i == '\n': + self.pending_newlines += 1 + else: + break + + if self.pending_newlines: + out = out[:-self.pending_newlines] + self.f.write(out) + + def default(self, node): + mapping = self._get_mapping(node) + if hasattr(node, 'linestart'): + if node.linestart: + self.desired_line_number = node.linestart + table = mapping[0] + key = node + + for i in mapping[1:]: + key = key[i] + pass + + if key.type in table: + self.engine(table[key.type], node) + self.prune() + +from xdis.code import iscode +from uncompyle6.scanner import get_scanner +from uncompyle6.show import ( + maybe_show_asm, +) + +def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False, + showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False): + """ + disassembles and deparses a given code block 'co' + """ + + assert iscode(co) + # store final output stream for case of error + scanner = get_scanner(version, is_pypy=is_pypy) + + tokens, customize = scanner.disassemble(co, code_objects=code_objects) + maybe_show_asm(showasm, tokens) + + debug_parser = dict(PARSER_DEFAULT_DEBUG) + if showgrammar: + debug_parser['reduce'] = showgrammar + debug_parser['errorstack'] = True + + # Build AST from disassembly. + deparsed = AligningWalker(version, scanner, out, showast=showast, + debug_parser=debug_parser, compile_mode=compile_mode, + is_pypy = is_pypy) + + isTopLevel = co.co_name == '' + deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel) + + assert deparsed.ast == 'stmts', 'Should have parsed grammar start' + + del tokens # save memory + + deparsed.mod_globs = find_globals(deparsed.ast, set()) + + # convert leading '__doc__ = "..." into doc string + try: + if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): + deparsed.print_docstring('', co.co_consts[0]) + del deparsed.ast[0] + if deparsed.ast[-1] == RETURN_NONE: + deparsed.ast.pop() # remove last node + # todo: if empty, add 'pass' + except: + pass + + # What we've been waiting for: Generate source from AST! + deparsed.gen_source(deparsed.ast, co.co_name, customize) + + for g in deparsed.mod_globs: + deparsed.write('# global %s ## Warning: Unused global' % g) + + if deparsed.ERROR: + raise SourceWalkerError("Deparsing stopped due to parse error") + return deparsed + +if __name__ == '__main__': + def deparse_test(co): + "This is a docstring" + sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) + # deparsed = deparse_code(sys_version, co, showasm=True, showast=True) + deparsed = align_deparse_code(sys_version, co, showasm=False, showast=False, + showgrammar=False) + print(deparsed.text) + return + deparse_test(deparse_test.__code__) diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 3a6042b5..5ca91745 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -34,7 +34,7 @@ For example in: The node will be associated with the text break, excluding the trailing newline. -Note we assocate the accumulated text with the node normally, but we just don't +Note we associate the accumulated text with the node normally, but we just don't do it recursively which is where offsets are probably located. 2. %b @@ -55,10 +55,9 @@ from __future__ import print_function import re, sys -from uncompyle6 import PYTHON3 +from uncompyle6 import PYTHON3, IS_PYPY from xdis.code import iscode from uncompyle6.semantics import pysource -from uncompyle6.parser import get_python_parser from uncompyle6 import parser from uncompyle6.scanner import Token, Code, get_scanner from uncompyle6.show import ( @@ -78,7 +77,7 @@ else: from StringIO import StringIO -from spark_parser import GenericASTTraversal, GenericASTTraversalPruningException, \ +from spark_parser import GenericASTTraversalPruningException, \ DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from collections import namedtuple @@ -110,37 +109,19 @@ TABLE_DIRECT_FRAGMENT = { } -MAP_DIRECT_FRAGMENT = dict(TABLE_DIRECT, **TABLE_DIRECT_FRAGMENT), - - class FragmentsWalker(pysource.SourceWalker, object): + MAP_DIRECT_FRAGMENT = () + stacked_params = ('f', 'indent', 'isLambda', '_globals') def __init__(self, version, scanner, showast=False, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode='exec', is_pypy=False): - GenericASTTraversal.__init__(self, ast=None) - self.scanner = scanner - params = { - 'f': StringIO(), - 'indent': '', - } - self.version = version - self.p = get_python_parser( - version, dict(debug_parser), - compile_mode=compile_mode, is_pypy=is_pypy - ) - self.showast = showast - self.params = params - self.param_stack = [] - self.ERROR = None - self.prec = 100 - self.return_none = False - self.mod_globs = set() - self.currentclass = None - self.classes = [] - self.pending_newlines = 0 + pysource.SourceWalker.__init__(self, version=version, out=StringIO(), + scanner=scanner, + showast=showast, debug_parser=debug_parser, + compile_mode=compile_mode, is_pypy=is_pypy) # hide_internal suppresses displaying the additional instructions that sometimes # exist in code but but were not written in the source code. @@ -150,12 +131,13 @@ class FragmentsWalker(pysource.SourceWalker, object): # deparsing we generally do need to see these instructions since we may be stopped # at one. So here we do not want to suppress showing such instructions. self.hide_internal = False - - self.name = None - self.offsets = {} self.last_finish = -1 + # FIXME: is there a better way? + global MAP_DIRECT_FRAGMENT + MAP_DIRECT_FRAGMENT = dict(TABLE_DIRECT, **TABLE_DIRECT_FRAGMENT), + f = property(lambda s: s.params['f'], lambda s, x: s.params.__setitem__('f', x), lambda s: s.params.__delitem__('f'), @@ -339,7 +321,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.preorder(node[0]) finish = len(self.f.getvalue()) if hasattr(node[0], 'offset'): - self.set_pos_info(node[0], self.last_finish, ) + self.set_pos_info(node[0], start, len(self.f.getvalue())) self.write(')') self.last_finish = finish + 1 else: @@ -534,7 +516,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.write(func_name) self.indentMore() - self.make_function(node, isLambda=False, code_index=code_index) + self.make_function(node, isLambda=False, code=code) self.set_pos_info(node, start, len(self.f.getvalue())) @@ -1613,7 +1595,7 @@ class FragmentsWalker(pysource.SourceWalker, object): self.set_pos_info(last_node, startnode_start, self.last_finish) return - def make_function(self, node, isLambda, nested=1, code_index=-2): + def make_function(self, node, isLambda, nested=1, code=None): """Dump function defintion, doc string, and function body.""" def build_param(ast, name, default): @@ -1664,7 +1646,7 @@ class FragmentsWalker(pysource.SourceWalker, object): if self.version > 3.0 and isLambda and iscode(node[-3].attr): code = node[-3].attr else: - code = node[code_index].attr + code = code.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) @@ -1748,7 +1730,7 @@ class FragmentsWalker(pysource.SourceWalker, object): pass def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, - showgrammar=False): + showgrammar=False, is_pypy=False): """ Convert the code object co into a python source fragment. @@ -1774,7 +1756,7 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, assert iscode(co) # store final output stream for case of error - scanner = get_scanner(version) + scanner = get_scanner(version, is_pypy=is_pypy) tokens, customize = scanner.disassemble(co) @@ -1816,10 +1798,10 @@ def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, if __name__ == '__main__': - def deparse_test(co): + def deparse_test(co, is_pypy=IS_PYPY): sys_version = sys.version_info.major + (sys.version_info.minor / 10.0) walk = deparse_code(sys_version, co, showasm=False, showast=False, - showgrammar=False) + showgrammar=False, is_pypy=IS_PYPY) print("deparsed source") print(walk.text, "\n") print('------------------------') diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index b00625f8..9a1c4731 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -140,18 +140,6 @@ TABLE_R = { # 'EXEC_STMT': ( '%|exec %c in %[1]C\n', 0, (0,maxint,', ') ), } -if not PYTHON3: - TABLE_R.update({ - 'STORE_SLICE+0': ( '%c[:]', 0 ), - 'STORE_SLICE+1': ( '%c[%p:]', 0, (1, 100) ), - 'STORE_SLICE+2': ( '%c[:%p]', 0, (1, 100) ), - 'STORE_SLICE+3': ( '%c[%p:%p]', 0, (1, 100), (2, 100) ), - 'DELETE_SLICE+0': ( '%|del %c[:]\n', 0 ), - 'DELETE_SLICE+1': ( '%|del %c[%c:]\n', 0, 1 ), - 'DELETE_SLICE+2': ( '%|del %c[:%c]\n', 0, 1 ), - 'DELETE_SLICE+3': ( '%|del %c[%c:%c]\n', 0, 1, 2 ), - }) - TABLE_R0 = { # 'BUILD_LIST': ( '[%C]', (0,-1,', ') ), # 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ), @@ -246,7 +234,6 @@ TABLE_DIRECT = { 'assign': ( '%|%c = %p\n', -1, (0, 200) ), 'augassign1': ( '%|%c %c %c\n', 0, 2, 1), 'augassign2': ( '%|%c.%[2]{pattr} %c %c\n', 0, -3, -4), -# 'dup_topx': ( '%c', 0), 'designList': ( '%c = %c', 0, -1 ), 'and': ( '%c and %c', 0, 2 ), 'ret_and': ( '%c and %c', 0, 2 ), @@ -274,8 +261,6 @@ TABLE_DIRECT = { 'kwargs': ( '%D', (0, maxint, ', ') ), 'importlist2': ( '%C', (0, maxint, ', ') ), - 'assert': ( '%|assert %c\n' , 0 ), - 'assert2': ( '%|assert %c, %c\n' , 0, 3 ), 'assert_expr_or': ( '%c or %c', 0, 2 ), 'assert_expr_and': ( '%c and %c', 0, 2 ), 'print_items_stmt': ( '%|print %c%c,\n', 0, 2), @@ -337,7 +322,6 @@ TABLE_DIRECT = { 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), 'except_suite': ( '%+%c%-%C', 0, (1, maxint, '') ), 'except_suite_finalize': ( '%+%c%-%C', 1, (3, maxint, '') ), - 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 5 ), 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), 'withasstmt': ( '%|with %c as %c:\n%+%c%-', 0, 2, 3), 'passstmt': ( '%|pass\n', ), @@ -346,40 +330,6 @@ TABLE_DIRECT = { 'kv2': ( '%c: %c', 1, 2 ), 'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ), - # CE - Fixes for tuples - 'assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), - 'assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), - - ####################### - # Python 2.3 Additions - ####################### - - # Import style for 2.0-2.3 - 'importstmt20': ( '%|import %c\n', 1), - 'importstar20': ( '%|from %[1]{pattr} import *\n', ), - 'importfrom20': ( '%|from %[1]{pattr} import %c\n', 2 ), - 'importlist20': ( '%C', (0, maxint, ', ') ), - - ####################### - # Python 2.5 Additions - ####################### - - # Import style for 2.5+ - 'importstmt': ( '%|import %c\n', 2), - 'importstar': ( '%|from %[2]{pattr} import *\n', ), - 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), - 'importmultiple': ( '%|import %c%c\n', 2, 3 ), - 'import_cont' : ( ', %c', 2 ), - - ######################## - # Python 3.2 and 3.3 only - ####################### - 'store_locals': ( '%|# inspect.currentframe().f_locals = __locals__\n', ), - - ######################## - # Python 3.4+ Additions - ####################### - 'LOAD_CLASSDEREF': ( '%{pattr}', ), } @@ -481,7 +431,7 @@ class ParserError(python_parser.ParserError): def __str__(self): lines = ['--- This code section failed: ---'] - lines.extend([i.format() for i in self.tokens]) + lines.extend([str(i) for i in self.tokens]) lines.extend( ['', str(self.error)] ) return '\n'.join(lines) @@ -533,8 +483,8 @@ class SourceWalker(GenericASTTraversal, object): 'indent': '', } self.version = version - self.p = get_python_parser(version, debug_parser=debug_parser, - compile_mode=compile_mode) + self.p = get_python_parser(version, debug_parser=dict(debug_parser), + compile_mode=compile_mode, is_pypy=is_pypy) self.debug_parser = dict(debug_parser) self.showast = showast self.params = params @@ -556,9 +506,94 @@ class SourceWalker(GenericASTTraversal, object): self.version = version self.is_pypy = is_pypy + self.customize_for_version(is_pypy, version) + return + + @staticmethod + def customize_for_version(is_pypy, version): + if is_pypy: + ######################## + # PyPy changes + ####################### + TABLE_DIRECT.update({ + 'assert_pypy': ( '%|assert %c\n' , 1 ), + 'assert2_pypy': ( '%|assert %c, %c\n' , 1, 4 ), + 'trystmt_pypy': ( '%|try:\n%+%c%-%c\n\n', 1, 2 ), + 'tryfinallystmt_pypy': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 3 ), + 'assign3_pypy': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 4, 3, 0, 1, 2 ), + 'assign2_pypy': ( '%|%c, %c = %c, %c\n', 3, 2, 0, 1), + }) + else: + ######################## + # Without PyPy + ####################### + TABLE_DIRECT.update({ + 'assert': ( '%|assert %c\n' , 0 ), + 'assert2': ( '%|assert %c, %c\n' , 0, 3 ), + 'trystmt': ( '%|try:\n%+%c%-%c\n\n', 1, 3 ), + 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 5 ), + 'assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), + 'assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), + }) + if version < 3.0: + TABLE_R.update({ + 'STORE_SLICE+0': ( '%c[:]', 0 ), + 'STORE_SLICE+1': ( '%c[%p:]', 0, (1, 100) ), + 'STORE_SLICE+2': ( '%c[:%p]', 0, (1, 100) ), + 'STORE_SLICE+3': ( '%c[%p:%p]', 0, (1, 100), (2, 100) ), + 'DELETE_SLICE+0': ( '%|del %c[:]\n', 0 ), + 'DELETE_SLICE+1': ( '%|del %c[%c:]\n', 0, 1 ), + 'DELETE_SLICE+2': ( '%|del %c[:%c]\n', 0, 1 ), + 'DELETE_SLICE+3': ( '%|del %c[%c:%c]\n', 0, 1, 2 ), + }) + if 2.0 <= version <= 2.3: - TABLE_DIRECT['tryfinallystmt'] = ( - '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 4 ) + TABLE_DIRECT.update({ + 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 4 ) + }) + ########################### + # Import style for 2.0-2.3 + ########################### + TABLE_DIRECT.update({ + 'importstmt20': ( '%|import %c\n', 1), + 'importstar20': ( '%|from %[1]{pattr} import *\n', ), + 'importfrom20': ( '%|from %[1]{pattr} import %c\n', 2 ), + 'importlist20': ( '%C', (0, maxint, ', ') ), + }) + elif version >= 2.5: + ######################## + # Import style for 2.5+ + ######################## + TABLE_DIRECT.update({ + 'importstmt': ( '%|import %c\n', 2), + 'importstar': ( '%|from %[2]{pattr} import *\n', ), + 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importmultiple': ( '%|import %c%c\n', 2, 3 ), + 'import_cont' : ( ', %c', 2 ), + }) + + ########################## + # Python 3.2 and 3.3 only + ########################## + if 3.2 <= version <= 3.3: + TABLE_DIRECT.update({ + 'store_locals': ( '%|# inspect.currentframe().f_locals = __locals__\n', ), + }) + elif version >= 3.4: + ######################## + # Python 3.4+ Additions + ####################### + TABLE_DIRECT.update({ + 'LOAD_CLASSDEREF': ( '%{pattr}', ), + }) + if version >= 3.6: + ######################## + # Python 3.6+ Additions + ####################### + TABLE_DIRECT.update({ + 'formatted_value': ( '{%c}', 0), + 'joined_str': ( "f'%c'", 2), + }) return f = property(lambda s: s.params['f'], @@ -712,11 +747,6 @@ class SourceWalker(GenericASTTraversal, object): if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_VALUE')]): self.write(' ') self.preorder(node[0]) - # 3.5 does jump optimization. The RETURN_END_IF in the return - # statement means to dedent. Earlier versions will just have - # RETURN_VALUE it is done by a nonterminal in the grammar. - if self.version >= 3.5 and node[-1] == 'RETURN_END_IF': - self.indentLess() self.println() self.prune() # stop recursing @@ -814,6 +844,10 @@ class SourceWalker(GenericASTTraversal, object): self.prec += 1 self.prune() + def n_str(self, node): + self.write(node[0].pattr) + self.prune() + def n_LOAD_CONST(self, node): data = node.pattr; datatype = type(data) if isinstance(datatype, int) and data == minint: @@ -1003,7 +1037,7 @@ class SourceWalker(GenericASTTraversal, object): self.write(func_name) self.indentMore() - self.make_function(node, isLambda=False, code=code) + self.make_function(node, isLambda=False, codeNode=code) if len(self.param_stack) > 1: self.write('\n\n') @@ -1013,7 +1047,7 @@ class SourceWalker(GenericASTTraversal, object): self.prune() # stop recursing def n_mklambda(self, node): - self.make_function(node, isLambda=True, code=node[-2]) + self.make_function(node, isLambda=True, codeNode=node[-2]) self.prune() # stop recursing def n_list_compr(self, node): @@ -1061,9 +1095,16 @@ class SourceWalker(GenericASTTraversal, object): """ p = self.prec self.prec = 27 - n = node[-2] if self.is_pypy and node[-1] == 'JUMP_BACK' else node[-1] + if node[-1].type == 'list_iter': + n = node[-1] + elif self.is_pypy and node[-1] == 'JUMP_BACK': + n = node[-2] list_expr = node[0] - designator = node[3] + + if len(node) >= 3: + designator = node[3] + elif self.is_pypy and n[0] == 'list_for': + designator = n[0][2] assert n == 'list_iter' assert designator == 'designator' @@ -1168,7 +1209,7 @@ class SourceWalker(GenericASTTraversal, object): self.write('{') if node[0] in ['LOAD_SETCOMP', 'LOAD_DICTCOMP']: self.comprehension_walk3(node, 1, 0) - elif node[0].type == 'load_closure': + elif node[0].type == 'load_closure' and self.version >= 3.0: self.setcomprehension_walk3(node, collection_index=4) else: self.comprehension_walk(node, iter_index=4) @@ -1514,7 +1555,7 @@ class SourceWalker(GenericASTTraversal, object): sep = INDENT_PER_LEVEL[:-1] self.write('{') - if self.version >= 3.0: + if self.version >= 3.0 and not self.is_pypy: if node[0].type.startswith('kvlist'): # Python 3.5+ style key/value list in mapexpr kv_node = node[0] @@ -1730,7 +1771,7 @@ class SourceWalker(GenericASTTraversal, object): # Is there some sort of invalid bounds access going on? if isinstance(entry[arg], int): self.preorder(node[entry[arg]]) - arg += 1 + arg += 1 elif typ == 'p': p = self.prec (index, self.prec) = entry[arg] @@ -1801,13 +1842,18 @@ class SourceWalker(GenericASTTraversal, object): def customize(self, customize): """ - Special handling for opcodes that take a variable number + Special handling for opcodes, such as those that take a variable number of arguments -- we add a new entry for each in TABLE_R. """ for k, v in list(customize.items()): if k in TABLE_R: continue op = k[ :k.rfind('_') ] + + if k == 'CALL_METHOD': + # This happens in PyPy only + TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100)) + if op == 'CALL_FUNCTION': TABLE_R[k] = ('%c(%P)', 0, (1, -1, ', ', 100)) elif op in ('CALL_FUNCTION_VAR', @@ -1872,7 +1918,7 @@ class SourceWalker(GenericASTTraversal, object): # return self.traverse(node[1]) raise Exception("Can't find tuple parameter " + name) - def make_function(self, node, isLambda, nested=1, code=None): + def make_function(self, node, isLambda, nested=1, codeNode=None): """Dump function defintion, doc string, and function body.""" def build_param(ast, name, default): @@ -1913,12 +1959,12 @@ class SourceWalker(GenericASTTraversal, object): pos_args, kw_args, annotate_args = args_node.attr else: defparams = node[:args_node.attr] - kw_args, annotate_args = (0, 0) + kw_args = 0 pass if 3.0 <= self.version <= 3.2: lambda_index = -2 - elif 3.03<= self.version: + elif 3.03 <= self.version: lambda_index = -3 else: lambda_index = None @@ -1927,7 +1973,7 @@ class SourceWalker(GenericASTTraversal, object): assert node[lambda_index].type == 'LOAD_LAMBDA' code = node[lambda_index].attr else: - code = code.attr + code = codeNode.attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) diff --git a/uncompyle6/show.py b/uncompyle6/show.py index ff16d12e..58cce4c7 100644 --- a/uncompyle6/show.py +++ b/uncompyle6/show.py @@ -14,7 +14,7 @@ def maybe_show_asm(showasm, tokens): if showasm: stream = showasm if hasattr(showasm, 'write') else sys.stdout for t in tokens: - stream.write(t.format()) + stream.write(str(t)) stream.write('\n') @@ -30,7 +30,7 @@ def maybe_show_ast(showast, ast): """ if showast: stream = showast if hasattr(showast, 'write') else sys.stdout - stream.write(repr(ast)) + stream.write(str(ast)) stream.write('\n') diff --git a/uncompyle6/verify.py b/uncompyle6/verify.py index 4411cbe0..d18719dd 100755 --- a/uncompyle6/verify.py +++ b/uncompyle6/verify.py @@ -1,6 +1,6 @@ # # (C) Copyright 2000-2002 by hartmut Goebel -# (C) Copyright 2015 by Rocky Bernstein +# (C) Copyright 2015-2016 by Rocky Bernstein # """ byte-code verification @@ -132,7 +132,7 @@ class CmpErrorMember(VerifyCmpError): # these members are ignored __IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names'] -def cmp_code_objects(version, code_obj1, code_obj2, name=''): +def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, name=''): """ Compare two code-objects. @@ -193,11 +193,19 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): import uncompyle6.scanners.scanner26 as scan scanner = scan.Scanner26() elif version == 2.7: - import uncompyle6.scanners.scanner27 as scan - scanner = scan.Scanner27() + if is_pypy: + import uncompyle6.scanners.pypy27 as scan + scanner = scan.ScannerPyPy27(show_asm=False) + else: + import uncompyle6.scanners.scanner27 as scan + scanner = scan.Scanner27() elif version == 3.2: - import uncompyle6.scanners.scanner32 as scan - scanner = scan.Scanner32() + if is_pypy: + import uncompyle6.scanners.pypy32 as scan + scanner = scan.ScannerPyPy32() + else: + import uncompyle6.scanners.scanner32 as scan + scanner = scan.Scanner32() elif version == 3.3: import uncompyle6.scanners.scanner33 as scan scanner = scan.Scanner33() @@ -326,7 +334,7 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') ) for c1, c2 in zip(codes1, codes2): - cmp_code_objects(version, c1, c2, name=name) + cmp_code_objects(version, is_pypy, c1, c2, name=name) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): @@ -336,13 +344,8 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): class Token(scanner.Token): """Token class with changed semantics for 'cmp()'.""" - def __cmp__(self, o): t = self.type # shortcut - loads = ('LOAD_NAME', 'LOAD_GLOBAL', 'LOAD_CONST') - if t in loads and o.type in loads: - if self.pattr == 'None' and o.pattr is None: - return 0 if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == (): return 0 if t == 'COME_FROM' == o.type: @@ -373,14 +376,14 @@ def compare_code_with_srcfile(pyc_filename, src_filename): % (PYTHON_MAGIC_INT, magic_int)) return msg code_obj2 = load_file(src_filename) - cmp_code_objects(version, code_obj1, code_obj2) + cmp_code_objects(version, is_pypy, code_obj1, code_obj2) return None def compare_files(pyc_filename1, pyc_filename2): """Compare two .pyc files.""" version, timestamp, magic_int1, code_obj1, is_pypy = uncompyle6.load_module(pyc_filename1) version, timestamp, magic_int2, code_obj2, is_pypy = uncompyle6.load_module(pyc_filename2) - cmp_code_objects(version, code_obj1, code_obj2) + cmp_code_objects(version, is_pypy, code_obj1, code_obj2) if __name__ == '__main__': t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52) diff --git a/uncompyle6/version.py b/uncompyle6/version.py index 37b40926..6525b7de 100644 --- a/uncompyle6/version.py +++ b/uncompyle6/version.py @@ -1,3 +1,3 @@ # This file is suitable for sourcing inside bash as # well as importing into Python -VERSION='2.7.0' +VERSION='2.8.0' From d030a04c1a1ea9bb38e382b17c69b27bee9716e3 Mon Sep 17 00:00:00 2001 From: DanielBradburn Date: Wed, 10 Aug 2016 22:20:43 +0200 Subject: [PATCH 3/8] added hypothesis test (currently failing due to limited support) for testing fstring uncompyling --- pytest/test_fstring.py | 109 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 pytest/test_fstring.py diff --git a/pytest/test_fstring.py b/pytest/test_fstring.py new file mode 100644 index 00000000..6222cc73 --- /dev/null +++ b/pytest/test_fstring.py @@ -0,0 +1,109 @@ +import hypothesis +from hypothesis import strategies as st + + +@st.composite +def expressions(draw): + # todo : would be nice to generate expressions using hypothesis however + # this is pretty involved so for now just use a corpus of expressions + # from which to select. + return draw(st.sampled_from(( + 'abc', + 'len(items)', + 'x + 1', + 'lineno', + ))) + + +@st.composite +def format_specifiers(draw): + """ + Generate a valid format specifier using the rules: + + format_spec ::= [[fill]align][sign][#][0][width][,][.precision][type] + fill ::= + align ::= "<" | ">" | "=" | "^" + sign ::= "+" | "-" | " " + width ::= integer + precision ::= integer + type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" + + See https://docs.python.org/2/library/string.html + + :param draw: Let hypothesis draw from other strategies. + + :return: An example format_specifier. + """ + alphabet_strategy = st.characters(min_codepoint=ord('a'), max_codepoint=ord('z')) + fill = draw(st.one_of(alphabet_strategy, st.none())) + align = draw(st.sampled_from(list('<>=^'))) + fill_align = (fill + align or '') if fill else '' + + type_ = draw(st.sampled_from('bcdeEfFgGnosxX%')) + can_have_sign = type_ in 'deEfFgGnoxX%' + can_have_comma = type_ in 'deEfFgG%' + can_have_precision = type_ in 'fFgG' + can_have_pound = type_ in 'boxX%' + can_have_zero = type_ in 'oxX' + + sign = draw(st.sampled_from(list('+- ') + [''])) if can_have_sign else '' + pound = draw(st.sampled_from(('#', '',))) if can_have_pound else '' + zero = draw(st.sampled_from(('0', '',))) if can_have_zero else '' + + int_strategy = st.integers(min_value=1, max_value=1000) + + width = draw(st.one_of(int_strategy, st.none())) + width = str(width) if width is not None else '' + + comma = draw(st.sampled_from((',', '',))) if can_have_comma else '' + if can_have_precision: + precision = draw(st.one_of(int_strategy, st.none())) + precision = '.' + str(precision) if precision else '' + else: + precision = '' + + return ''.join((fill_align, sign, pound, zero, width, comma, precision, type_,)) + + +@st.composite +def fstrings(draw): + """ + Generate a valid fstring. + See https://www.python.org/dev/peps/pep-0498/#specification + + :param draw: Let hypothsis draw from other strategies. + + :return: A valid f string. + """ + + prefix = draw(st.sampled_from('fF')) + raw = draw(st.sampled_from(('', 'rR',))) + quote_char = draw(st.sampled_from(("'", '"', "'''", '"""',))) + + integer_strategy = st.integers(min_value=0, max_value=3) + expression_count = draw(integer_strategy) + content = [] + for _ in range(expression_count): + expression = draw(expressions()) + conversion = draw(st.sampled_from(('', '!s', '!r', '!a',))) + specifier = draw(format_specifiers()) + content.append(f'{expression}{specifier}{conversion}') + content = ''.join(content) + + return f'{prefix}{raw}{quote_char}{content}{quote_char}' + + +@hypothesis.given(format_specifiers()) +def test_format_specifiers(format_specifier): + """Verify that format_specifiers generates valid specifiers""" + try: + exec('"{:' + format_specifier + '}".format(0)') + except ValueError as e: + if 'Unknown format code' not in str(e): + raise + + +@hypothesis.given(fstrings()) +def test_uncompyle_fstring(fstring): + """Verify uncompyle fstring bytecode""" + assert not fstring From e06a90ed27f3e453ab647520932ff616de785e0b Mon Sep 17 00:00:00 2001 From: DanielBradburn Date: Wed, 10 Aug 2016 22:27:54 +0200 Subject: [PATCH 4/8] added hypothesis to requirements-dev --- pytest/test_fstring.py | 34 +++++++++++++++++++++------------- requirements-dev.txt | 1 + 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/pytest/test_fstring.py b/pytest/test_fstring.py index 6222cc73..97148775 100644 --- a/pytest/test_fstring.py +++ b/pytest/test_fstring.py @@ -1,5 +1,8 @@ +# test import hypothesis from hypothesis import strategies as st +# uncompyle6 +from uncompyle6 import PYTHON_VERSION, deparse_code @st.composite @@ -12,6 +15,12 @@ def expressions(draw): 'len(items)', 'x + 1', 'lineno', + 'container', + 'self.attribute', + 'self.method()', + 'sorted(items, key=lambda x: x.name)', + 'func(*args, **kwargs)', + 'text or default', ))) @@ -68,30 +77,26 @@ def format_specifiers(draw): @st.composite def fstrings(draw): """ - Generate a valid fstring. + Generate a valid f-string. See https://www.python.org/dev/peps/pep-0498/#specification :param draw: Let hypothsis draw from other strategies. - :return: A valid f string. + :return: A valid f-string. """ - prefix = draw(st.sampled_from('fF')) - raw = draw(st.sampled_from(('', 'rR',))) - quote_char = draw(st.sampled_from(("'", '"', "'''", '"""',))) - + raw = draw(st.sampled_from(list('rR') + [''])) integer_strategy = st.integers(min_value=0, max_value=3) expression_count = draw(integer_strategy) content = [] for _ in range(expression_count): expression = draw(expressions()) - conversion = draw(st.sampled_from(('', '!s', '!r', '!a',))) - specifier = draw(format_specifiers()) - content.append(f'{expression}{specifier}{conversion}') + #conversion = draw(st.sampled_from(('', '!s', '!r', '!a',))) + #specifier = draw(st.sampled_from(format_specifiers(), st.just(''))) + content.append(f'{{{expression}}}') content = ''.join(content) - return f'{prefix}{raw}{quote_char}{content}{quote_char}' - + return f"{prefix}{raw}'{content}'" @hypothesis.given(format_specifiers()) def test_format_specifiers(format_specifier): @@ -105,5 +110,8 @@ def test_format_specifiers(format_specifier): @hypothesis.given(fstrings()) def test_uncompyle_fstring(fstring): - """Verify uncompyle fstring bytecode""" - assert not fstring + """Verify uncompyling fstring bytecode""" + hypothesis.assume('{' in fstring) # ignore fstring with no expressions + expr = f'{fstring}\n' + code = compile(expr, '', 'single') + assert deparse_code(PYTHON_VERSION, code, compile_mode='single').text == expr diff --git a/requirements-dev.txt b/requirements-dev.txt index cb64874e..26090747 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,3 @@ pytest flake8 +hypothesis \ No newline at end of file From 7fba24198fb3a7d8d284e4c4df21543c0865c59e Mon Sep 17 00:00:00 2001 From: DanielBradburn Date: Wed, 10 Aug 2016 22:29:02 +0200 Subject: [PATCH 5/8] small formatting change --- pytest/test_fstring.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytest/test_fstring.py b/pytest/test_fstring.py index 97148775..2947337b 100644 --- a/pytest/test_fstring.py +++ b/pytest/test_fstring.py @@ -98,6 +98,7 @@ def fstrings(draw): return f"{prefix}{raw}'{content}'" + @hypothesis.given(format_specifiers()) def test_format_specifiers(format_specifier): """Verify that format_specifiers generates valid specifiers""" From 2bd850f297f2863d57505f34815d7822bc302dd8 Mon Sep 17 00:00:00 2001 From: DanielBradburn Date: Sun, 14 Aug 2016 20:44:23 +0200 Subject: [PATCH 6/8] added examples for known failures --- pytest/test_fstring.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pytest/test_fstring.py b/pytest/test_fstring.py index 2947337b..9fb38e3f 100644 --- a/pytest/test_fstring.py +++ b/pytest/test_fstring.py @@ -1,4 +1,5 @@ # test +import pytest import hypothesis from hypothesis import strategies as st # uncompyle6 @@ -84,8 +85,8 @@ def fstrings(draw): :return: A valid f-string. """ - prefix = draw(st.sampled_from('fF')) - raw = draw(st.sampled_from(list('rR') + [''])) + prefix = draw(st.sampled_from('f')) + raw = draw(st.sampled_from(list('r') + [''])) integer_strategy = st.integers(min_value=0, max_value=3) expression_count = draw(integer_strategy) content = [] @@ -93,10 +94,10 @@ def fstrings(draw): expression = draw(expressions()) #conversion = draw(st.sampled_from(('', '!s', '!r', '!a',))) #specifier = draw(st.sampled_from(format_specifiers(), st.just(''))) - content.append(f'{{{expression}}}') + content.append('{{{}}}'.format(expression)) content = ''.join(content) - return f"{prefix}{raw}'{content}'" + return "{}{}'{}'".format(prefix, raw, content) @hypothesis.given(format_specifiers()) @@ -109,10 +110,20 @@ def test_format_specifiers(format_specifier): raise +@pytest.mark.skipif(PYTHON_VERSION < 3.6, reason="requires python3.6") @hypothesis.given(fstrings()) +@hypothesis.example("f'{abc}'") # BUG: strings with a single expression do not uncompyle correctly. +@hypothesis.example("fr'{abc}{xyz}'") # BUG: no support for raw f strings. +@hypothesis.example("f'{len(items)}{abc}'") # BUG: more complicated expressions than LOAD_NAME don't work def test_uncompyle_fstring(fstring): """Verify uncompyling fstring bytecode""" - hypothesis.assume('{' in fstring) # ignore fstring with no expressions - expr = f'{fstring}\n' + + # ignore fstring with no expressions an fsring with + # no expressions just gets compiled to a normal string. + hypothesis.assume('{' in fstring) + + expr = fstring + '\n' code = compile(expr, '', 'single') - assert deparse_code(PYTHON_VERSION, code, compile_mode='single').text == expr + deparsed = deparse_code(PYTHON_VERSION, code, compile_mode='single') + + assert deparsed.text == expr From 004ce5c49143431cbc79afc6ea5a46a54e97a639 Mon Sep 17 00:00:00 2001 From: DanielBradburn Date: Tue, 16 Aug 2016 08:24:06 +0200 Subject: [PATCH 7/8] Fixed bug with FORMAT_VALUE where a sub expression would not be correctly interpreted --- pytest/test_fstring.py | 28 +++++++++++++++++----------- uncompyle6/parsers/parse3.py | 5 +++-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/pytest/test_fstring.py b/pytest/test_fstring.py index 9fb38e3f..cc11a8d5 100644 --- a/pytest/test_fstring.py +++ b/pytest/test_fstring.py @@ -1,3 +1,5 @@ +# std +import os # test import pytest import hypothesis @@ -85,19 +87,20 @@ def fstrings(draw): :return: A valid f-string. """ - prefix = draw(st.sampled_from('f')) - raw = draw(st.sampled_from(list('r') + [''])) + is_raw = draw(st.booleans()) integer_strategy = st.integers(min_value=0, max_value=3) expression_count = draw(integer_strategy) content = [] for _ in range(expression_count): expression = draw(expressions()) - #conversion = draw(st.sampled_from(('', '!s', '!r', '!a',))) - #specifier = draw(st.sampled_from(format_specifiers(), st.just(''))) - content.append('{{{}}}'.format(expression)) + # not yet : conversion not supported + conversion = ''#draw(st.sampled_from(('', '!s', '!r', '!a',))) + has_specifier = draw(st.booleans()) + specifier = ':' + draw(format_specifiers()) if has_specifier else '' + content.append('{{{}{}}}'.format(expression, conversion, specifier)) content = ''.join(content) - return "{}{}'{}'".format(prefix, raw, content) + return "f{}'{}'".format('r' if is_raw else '', content) @hypothesis.given(format_specifiers()) @@ -110,11 +113,8 @@ def test_format_specifiers(format_specifier): raise -@pytest.mark.skipif(PYTHON_VERSION < 3.6, reason="requires python3.6") +@pytest.mark.skipif(PYTHON_VERSION < 3.6, reason='need at least python 3.6') @hypothesis.given(fstrings()) -@hypothesis.example("f'{abc}'") # BUG: strings with a single expression do not uncompyle correctly. -@hypothesis.example("fr'{abc}{xyz}'") # BUG: no support for raw f strings. -@hypothesis.example("f'{len(items)}{abc}'") # BUG: more complicated expressions than LOAD_NAME don't work def test_uncompyle_fstring(fstring): """Verify uncompyling fstring bytecode""" @@ -122,8 +122,14 @@ def test_uncompyle_fstring(fstring): # no expressions just gets compiled to a normal string. hypothesis.assume('{' in fstring) + # BUG : At the moment a single expression is not supported + # for example f'{abc}'. + hypothesis.assume(fstring.count('{') > 1) + expr = fstring + '\n' code = compile(expr, '', 'single') deparsed = deparse_code(PYTHON_VERSION, code, compile_mode='single') + recompiled = compile(deparsed.text, '', 'single') - assert deparsed.text == expr + if recompiled != code: + assert deparsed.text == expr diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 7d70e9d8..f7f88f2b 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -461,8 +461,8 @@ class Python3Parser(PythonParser): elif opname == 'FORMAT_VALUE': # Python 3.6+ self.addRule(""" - formatted_value ::= LOAD_FAST FORMAT_VALUE - formatted_value ::= LOAD_NAME FORMAT_VALUE + formatted_value ::= expr FORMAT_VALUE + formatted_value ::= expr FORMAT_VALUE str ::= LOAD_CONST formatted_value_or_str ::= formatted_value formatted_value_or_str ::= str @@ -492,6 +492,7 @@ class Python3Parser(PythonParser): rule = ('load_closure ::= %s%s' % (('LOAD_CLOSURE ' * v), opname)) self.add_unique_rule(rule, opname, token.attr, customize) if opname_base == 'BUILD_LIST' and saw_format_value: + saw_format_value = False format_or_str_n = "formatted_value_or_str_%s" % v self.addRule(""" expr ::= joined_str From 5ab3e52c9c94e9ec3b34d38a439d99f634caabea Mon Sep 17 00:00:00 2001 From: moagstar Date: Tue, 16 Aug 2016 08:38:38 +0200 Subject: [PATCH 8/8] disable test_format_specifiers on python < 3.6 since this is only required for the fstring tests and was causing failures on 2.7 --- pytest/test_fstring.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytest/test_fstring.py b/pytest/test_fstring.py index cc11a8d5..0b6329e7 100644 --- a/pytest/test_fstring.py +++ b/pytest/test_fstring.py @@ -103,6 +103,7 @@ def fstrings(draw): return "f{}'{}'".format('r' if is_raw else '', content) +@pytest.mark.skipif(PYTHON_VERSION < 3.6, reason='need at least python 3.6') @hypothesis.given(format_specifiers()) def test_format_specifiers(format_specifier): """Verify that format_specifiers generates valid specifiers"""