diff --git a/ChangeLog b/ChangeLog index c26d158a..a66b0c8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,120 @@ +2015-12-27 rocky + + * README.rst, test/bytecompile-tests, uncompyle6/opcodes/Makefile, + uncompyle6/opcodes/opcode_23.py, uncompyle6/opcodes/opcode_24.py, + uncompyle6/opcodes/opcode_25.py, uncompyle6/opcodes/opcode_26.py, + uncompyle6/opcodes/opcode_27.py, uncompyle6/opcodes/opcode_32.py, + uncompyle6/opcodes/opcode_33.py, uncompyle6/opcodes/opcode_34.py, + uncompyle6/parser.py, uncompyle6/parsers/parse3.py, + uncompyle6/scanner.py, uncompyle6/scanners/scanner25.py, + uncompyle6/scanners/scanner26.py, uncompyle6/scanners/scanner27.py, + uncompyle6/scanners/scanner3.py, uncompyle6/scanners/scanner32.py, + uncompyle6/scanners/scanner33.py, uncompyle6/scanners/scanner34.py, + uncompyle6/semantics/fragments.py, uncompyle6/semantics/pysource.py: + DRY Python3 scanner code. Some cross version handling fixed. Some + Python 3.2 and 3.3 deparse fixes. + +2015-12-26 rocky + + * .travis.yml, test/Makefile, uncompyle6/verify.py: Running native + on Python 3.3 needs more work + +2015-12-26 rocky + + * test/Makefile, test/test_pythonlib.py: Add ok-2.7 tests for 3.4 + full testing + +2015-12-26 rocky + + * test/Makefile, test/bytecompile-tests, test/test_pythonlib.py: Add + verify tests. Add Python 2.6 bytecode and use. + +2015-12-26 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: Add node and template code to + cleanup "for" handling + +2015-12-26 rocky + + * .travis.yml: Try Python 2.6 testing on travis + +2015-12-26 rocky + + * test/Makefile: For testing we can't 3.3 bytecodes on 2.7 yet, so + use 3.2 + +2015-12-26 rocky + + * .travis.yml, Makefile, requirements-dev.txt, test/Makefile, + test/bytecompile-tests, test/test_pythonlib.py, + uncompyle6/__init__.py, uncompyle6/opcodes/opcode_32.py, + uncompyle6/opcodes/opcode_33.py, uncompyle6/opcodes/opcode_34.py, + uncompyle6/scanner.py, uncompyle6/scanners/scanner32.py, + uncompyle6/scanners/scanner33.py, uncompyle6/scanners/scanner34.py, + uncompyle6/semantics/pysource.py: Fix up Python 3.2, 3.3, and 3.4 + cross-version scanners Try travis 2.6 and 3.3 + +2015-12-26 rocky + + * .travis.yml: Travis: try checking 3.4 + +2015-12-26 rocky + + * test/simple_source/exception/05_try_except.py, + test/simple_source/looping/10_while.py, + test/simple_source/looping/while.py, + test/simple_source/simple_stmts/00_assign.py, + test/simple_source/simple_stmts/00_import.py, + test/simple_source/simple_stmts/00_pass.py, + test/simple_source/simple_stmts/15_assert.py, + test/simple_source/stmts/00_assign.py, + test/simple_source/stmts/00_import.py, + test/simple_source/stmts/00_pass.py, + test/simple_source/stmts/15_assert.py, + test/simple_source/stmts/15_for_if.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py, + uncompyle6/scanners/scanner32.py, uncompyle6/scanners/scanner34.py: + Fix up looping by reinstating JUMP_ABSOLUTE -> JUMP_BACK or CONTINUE + get jump offsets into jump attributes. Fix up 3.2 scanner paritally + and use that in 3.4 for in cross version disassembly. + +2015-12-26 rocky + + * test/simple_source/exception/01_try_except.py, + test/simple_source/exception/05_try_except.py, uncompyle6/main.py, + uncompyle6/opcodes/opcode_34.py, uncompyle6/parsers/parse3.py, + uncompyle6/semantics/pysource.py: Python3 try/except handling + improvements. Add Walker exception and use that: fixes erroneous + uncompyle success message on parse error. + +2015-12-25 rocky + + * test/simple_source/exception/01_try_except.py, + uncompyle6/parsers/parse2.py, uncompyle6/parsers/parse3.py: WIP redo + try/except for Python3 + +2015-12-25 rocky + + * uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: Fix bugs in using pysource from + fragments. + +2015-12-25 rocky + + * uncompyle6/semantics/Makefile, uncompyle6/semantics/fragments.py, + uncompyle6/semantics/pysource.py: Two modes of disassembly, one + where we show hidden code and one where we don't. + +2015-12-25 rocky + + * README.rst: README.rst typos + +2015-12-25 rocky + + * .gitignore, ChangeLog, MANIFEST.in, NEWS, __pkginfo__.py, + test/Makefile: Get ready for releaes 2.0.0 + 2015-12-25 rocky * pytest/test_deparse.py: Port deparse test from python-deparse to diff --git a/NEWS b/NEWS index b885ce4b..481dc463 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,12 @@ +uncompyle6 1.0.0 2015-12-27 + +- Python 3.x deparsing much more solid +- Better cross-version deparsing + +Some bugs squashed while other run rampant. Some code cleanup while +much more is yet needed. More tests added, but many more are needed. + + uncompyle6 1.0.0 2015-12-11 Changes from uncompyle2 diff --git a/README.rst b/README.rst index e0f0610f..f7e57040 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,8 @@ uncompyle6 ========== -A native Python Byte-code Disassembler, Decompiler, and byte-code library +A native Python Byte-code Disassembler, Decompiler, Fragment Decompiler +and byte-code library Introduction @@ -11,7 +12,8 @@ Introduction *uncompyle6* translates Python byte-code back into equivalent Python source code. It accepts byte-codes from Python version 2.5 to 3.4 or -so and has been tested on Python 2.6, 2.7 and Python 3.4. +so and has been tested on Python running verfsions 2.6, 2.7, 3.3 and +3.4. Why this? --------- @@ -83,9 +85,12 @@ for usage help Known Bugs/Restrictions ----------------------- -Python 3 deparsing is getting there, but not solid. Using Python 2 to -deparse Python 3 is problematic, especilly for versions 3.4 and -greater. +Python 2 deparsing is probably as solid as the various versions of +uncompyle2. Python 3 deparsing is not as solid. Using Python 2 to +deparse Python 3 has severe limitations, due to byte code format +differences and the current inablity to retrieve code object fields across +different Python versions. (I envy the pycdc C++ code which doesn't have such +problems because they live totally outside of Python.) See Also -------- diff --git a/__pkginfo__.py b/__pkginfo__.py index d2a0348d..b3eeef35 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -40,7 +40,7 @@ def get_srcdir(): return os.path.realpath(filename) ns = {} -version = '2.0.0' +version = '2.1.0' web = 'https://github.com/rocky/python-uncompyle6/' # tracebacks in zip files are funky and not debuggable diff --git a/test/bytecode_3.2/add.pyc b/test/bytecode_3.2/add.pyc deleted file mode 100644 index 112809eb..00000000 Binary files a/test/bytecode_3.2/add.pyc and /dev/null differ diff --git a/test/bytecode_3.2/and.pyc b/test/bytecode_3.2/and.pyc deleted file mode 100644 index 0677927d..00000000 Binary files a/test/bytecode_3.2/and.pyc and /dev/null differ diff --git a/test/bytecode_3.2/assign.pyc b/test/bytecode_3.2/assign.pyc deleted file mode 100644 index b071f0c9..00000000 Binary files a/test/bytecode_3.2/assign.pyc and /dev/null differ diff --git a/test/bytecode_3.2/assign_none.pyc b/test/bytecode_3.2/assign_none.pyc deleted file mode 100644 index 51f6d24b..00000000 Binary files a/test/bytecode_3.2/assign_none.pyc and /dev/null differ diff --git a/test/bytecode_3.2/assign_none_str.pyc b/test/bytecode_3.2/assign_none_str.pyc deleted file mode 100644 index 1673aa81..00000000 Binary files a/test/bytecode_3.2/assign_none_str.pyc and /dev/null differ diff --git a/test/bytecode_3.2/divide_floor.pyc b/test/bytecode_3.2/divide_floor.pyc index 24899bdb..336adfec 100644 Binary files a/test/bytecode_3.2/divide_floor.pyc and b/test/bytecode_3.2/divide_floor.pyc differ diff --git a/test/bytecode_3.2/divide_true.pyc b/test/bytecode_3.2/divide_true.pyc index dcaecfb5..705df868 100644 Binary files a/test/bytecode_3.2/divide_true.pyc and b/test/bytecode_3.2/divide_true.pyc differ diff --git a/test/bytecode_3.2/for.pyc b/test/bytecode_3.2/for.pyc deleted file mode 100644 index a37764eb..00000000 Binary files a/test/bytecode_3.2/for.pyc and /dev/null differ diff --git a/test/bytecode_3.2/if.pyc b/test/bytecode_3.2/if.pyc deleted file mode 100644 index 60102cb0..00000000 Binary files a/test/bytecode_3.2/if.pyc and /dev/null differ diff --git a/test/bytecode_3.2/ifelse.pyc b/test/bytecode_3.2/ifelse.pyc deleted file mode 100644 index 765fbd09..00000000 Binary files a/test/bytecode_3.2/ifelse.pyc and /dev/null differ diff --git a/test/bytecode_3.2/modulo.pyc b/test/bytecode_3.2/modulo.pyc index 201095ca..21628eb1 100644 Binary files a/test/bytecode_3.2/modulo.pyc and b/test/bytecode_3.2/modulo.pyc differ diff --git a/test/bytecode_3.2/multiply.pyc b/test/bytecode_3.2/multiply.pyc index f18d053f..d1370ea6 100644 Binary files a/test/bytecode_3.2/multiply.pyc and b/test/bytecode_3.2/multiply.pyc differ diff --git a/test/bytecode_3.2/or.pyc b/test/bytecode_3.2/or.pyc deleted file mode 100644 index 386cc391..00000000 Binary files a/test/bytecode_3.2/or.pyc and /dev/null differ diff --git a/test/bytecode_3.2/positional.pyc b/test/bytecode_3.2/positional.pyc deleted file mode 100644 index 66ea7b9c..00000000 Binary files a/test/bytecode_3.2/positional.pyc and /dev/null differ diff --git a/test/bytecode_3.2/power.pyc b/test/bytecode_3.2/power.pyc index 86151859..e64ac4b7 100644 Binary files a/test/bytecode_3.2/power.pyc and b/test/bytecode_3.2/power.pyc differ diff --git a/test/bytecode_3.2/shift_left.pyc b/test/bytecode_3.2/shift_left.pyc index d18c0ac3..9aedcb7c 100644 Binary files a/test/bytecode_3.2/shift_left.pyc and b/test/bytecode_3.2/shift_left.pyc differ diff --git a/test/bytecode_3.2/shift_right.pyc b/test/bytecode_3.2/shift_right.pyc index 8643247a..b194727f 100644 Binary files a/test/bytecode_3.2/shift_right.pyc and b/test/bytecode_3.2/shift_right.pyc differ diff --git a/test/bytecode_3.2/subscription.pyc b/test/bytecode_3.2/subscription.pyc index 6b974ad7..0de63554 100644 Binary files a/test/bytecode_3.2/subscription.pyc and b/test/bytecode_3.2/subscription.pyc differ diff --git a/test/bytecode_3.2/subtract.pyc b/test/bytecode_3.2/subtract.pyc index 439dc8dd..fe1cd0b6 100644 Binary files a/test/bytecode_3.2/subtract.pyc and b/test/bytecode_3.2/subtract.pyc differ diff --git a/test/bytecode_3.2/while.pyc b/test/bytecode_3.2/while.pyc deleted file mode 100644 index b3f2fd7a..00000000 Binary files a/test/bytecode_3.2/while.pyc and /dev/null differ diff --git a/test/bytecode_3.2/xor.pyc b/test/bytecode_3.2/xor.pyc index e75f33bd..da0fd2be 100644 Binary files a/test/bytecode_3.2/xor.pyc and b/test/bytecode_3.2/xor.pyc differ diff --git a/test/bytecompile-tests b/test/bytecompile-tests index 223a3f6a..17d20ac6 100755 --- a/test/bytecompile-tests +++ b/test/bytecompile-tests @@ -70,7 +70,7 @@ for root, dirs, basenames in os.walk('simple_source'): simple_source.append(os.path.join(root, basename)[0:-3]) pass -tests['2.6'] = tests['2.7'] = tests['3.3'] = tests['3.4'] = simple_source +tests['2.6'] = tests['2.7'] = tests['3.2'] = tests['3.3'] = tests['3.4'] = simple_source total_tests = len(tests['2.7']) #tests['2.2'].sort(); print tests['2.2'] diff --git a/uncompyle6/opcodes/Makefile b/uncompyle6/opcodes/Makefile new file mode 100644 index 00000000..97ae30ad --- /dev/null +++ b/uncompyle6/opcodes/Makefile @@ -0,0 +1,7 @@ +# Whatever it is you want to do, it should be forwarded to the +# to top-level irectories +PHONY=check all +all: check + +%: + $(MAKE) -C ../.. $@ diff --git a/uncompyle6/opcodes/opcode_23.py b/uncompyle6/opcodes/opcode_23.py index 485ea13a..90779e99 100755 --- a/uncompyle6/opcodes/opcode_23.py +++ b/uncompyle6/opcodes/opcode_23.py @@ -1,6 +1,10 @@ """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). +CPython 2.3 bytecode opcodes + +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 2.3's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', diff --git a/uncompyle6/opcodes/opcode_24.py b/uncompyle6/opcodes/opcode_24.py index 94ddf22d..73756531 100755 --- a/uncompyle6/opcodes/opcode_24.py +++ b/uncompyle6/opcodes/opcode_24.py @@ -1,6 +1,10 @@ """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). +CPython 2.4 bytecode opcodes + +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 2.4's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ __all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", diff --git a/uncompyle6/opcodes/opcode_25.py b/uncompyle6/opcodes/opcode_25.py index 8ce055b6..af71ac51 100755 --- a/uncompyle6/opcodes/opcode_25.py +++ b/uncompyle6/opcodes/opcode_25.py @@ -1,6 +1,10 @@ """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). +CPython 2.5 bytecode opcodes + +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 2.5's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', diff --git a/uncompyle6/opcodes/opcode_26.py b/uncompyle6/opcodes/opcode_26.py index c085977b..949d84c3 100755 --- a/uncompyle6/opcodes/opcode_26.py +++ b/uncompyle6/opcodes/opcode_26.py @@ -1,6 +1,10 @@ """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). +CPython 2.6 bytecode opcodes + +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 3.4's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', diff --git a/uncompyle6/opcodes/opcode_27.py b/uncompyle6/opcodes/opcode_27.py index 8f22f354..92e8559e 100755 --- a/uncompyle6/opcodes/opcode_27.py +++ b/uncompyle6/opcodes/opcode_27.py @@ -1,6 +1,10 @@ """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). +CPython 2.7 bytecode opcodes + +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 3.4's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', @@ -198,3 +202,8 @@ def_op('MAP_ADD', 147) updateGlobal() del def_op, name_op, jrel_op, jabs_op + +from uncompyle6 import PYTHON_VERSION +if PYTHON_VERSION == 2.7: + import dis + assert all(item in opmap.items() for item in dis.opmap.items()) diff --git a/uncompyle6/opcodes/opcode_32.py b/uncompyle6/opcodes/opcode_32.py index 1d456e0c..046a08a8 100644 --- a/uncompyle6/opcodes/opcode_32.py +++ b/uncompyle6/opcodes/opcode_32.py @@ -1,7 +1,10 @@ - """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). +CPython 3.2 bytecode opcodes + +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 3.4's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ __all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", @@ -40,6 +43,16 @@ def jabs_op(name, op): def_op(name, op) hasjabs.append(op) +def updateGlobal(): + # JUMP_OPs are used in verification are set in the scanner + # and used in the parser grammar + globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']}) + globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']}) + globals().update({'JA': opmap['JUMP_ABSOLUTE']}) + globals().update({'JF': opmap['JUMP_FORWARD']}) + globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()])) + globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)}) + # Instruction opcodes for compiled code # Blank lines correspond to available opcodes @@ -191,4 +204,10 @@ def_op('MAP_ADD', 147) def_op('EXTENDED_ARG', 144) EXTENDED_ARG = 144 +updateGlobal() del def_op, name_op, jrel_op, jabs_op + +from uncompyle6 import PYTHON_VERSION +if PYTHON_VERSION == 3.2: + import dis + assert all(item in opmap.items() for item in dis.opmap.items()) diff --git a/uncompyle6/opcodes/opcode_33.py b/uncompyle6/opcodes/opcode_33.py index 879e815c..f4ed4871 100644 --- a/uncompyle6/opcodes/opcode_33.py +++ b/uncompyle6/opcodes/opcode_33.py @@ -1,8 +1,13 @@ +""" +CPython 3.3 bytecode opcodes +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 3.3's opcode.py with some opcodes that simplify +parsing and semantic interpretation. """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). -""" + +# Note: this should look exactly like Python 3.4's opcode.py __all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", "haslocal", "hascompare", "hasfree", "opname", "opmap", @@ -40,6 +45,16 @@ def jabs_op(name, op): def_op(name, op) hasjabs.append(op) +def updateGlobal(): + # JUMP_OPs are used in verification are set in the scanner + # and used in the parser grammar + globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']}) + globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']}) + globals().update({'JA': opmap['JUMP_ABSOLUTE']}) + globals().update({'JF': opmap['JUMP_FORWARD']}) + globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()])) + globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)}) + # Instruction opcodes for compiled code # Blank lines correspond to available opcodes @@ -95,6 +110,7 @@ def_op('LOAD_BUILD_CLASS', 71) # Python3 drops/changes: # def_op('PRINT_ITEM', 71) # def_op('PRINT_NEWLINE', 72) +def_op('YIELD_FROM', 72) # def_op('PRINT_ITEM_TO', 73) # def_op('PRINT_NEWLINE_TO', 74) @@ -186,4 +202,13 @@ def_op('MAP_ADD', 147) def_op('EXTENDED_ARG', 144) EXTENDED_ARG = 144 +updateGlobal() del def_op, name_op, jrel_op, jabs_op + +from uncompyle6 import PYTHON_VERSION +if PYTHON_VERSION == 3.3: + import dis + # for item in dis.opmap.items(): + # if item not in opmap.items(): + # print(item) + assert all(item in opmap.items() for item in dis.opmap.items()) diff --git a/uncompyle6/opcodes/opcode_34.py b/uncompyle6/opcodes/opcode_34.py index b54bcf4a..e69ffad5 100644 --- a/uncompyle6/opcodes/opcode_34.py +++ b/uncompyle6/opcodes/opcode_34.py @@ -1,9 +1,11 @@ """ -opcode module - potentially shared between dis and other modules which -operate on bytecodes (e.g. peephole optimizers). -""" +CPython 3.4 bytecode opcodes -# Note: this should look exactly like Python 3.4's opcode.py +This is used in scanner (bytecode disassembly) and parser (Python grammar). + +This is a superset of Python 3.4's opcode.py with some opcodes that simplify +parsing and semantic interpretation. +""" __all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", "haslocal", "hascompare", "hasfree", "opname", "opmap", @@ -43,8 +45,8 @@ def jabs_op(name, op): hasjabs.append(op) def updateGlobal(): - # JUMP_OPs are used in verification and in the scanner in resolving forward/backward - # jumps + # JUMP_OPs are used in verification are set in the scanner + # and used in the parser grammar globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']}) globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']}) globals().update({'JA': opmap['JUMP_ABSOLUTE']}) @@ -215,3 +217,8 @@ EXTENDED_ARG = 144 updateGlobal() del def_op, name_op, jrel_op, jabs_op + +from uncompyle6 import PYTHON_VERSION +if PYTHON_VERSION == 3.4: + import dis + assert all(item in opmap.items() for item in dis.opmap.items()) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 12f30755..451d5c92 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -82,10 +82,12 @@ def get_python_parser(version, debug_parser): """ if version < 3.0: import uncompyle6.parsers.parse2 as parse2 - return parse2.Python2Parser(debug_parser) + p = parse2.Python2Parser(debug_parser) else: import uncompyle6.parsers.parse3 as parse3 - return parse3.Python3Parser(debug_parser) + p = parse3.Python3Parser(debug_parser) + p.version = version + return p def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG): @@ -94,9 +96,9 @@ def python_parser(version, co, out=sys.stdout, showasm=False, from uncompyle6.scanner import get_scanner scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) - # if showasm: - # for t in tokens: - # print(t) + if showasm: + for t in tokens: + print(t) p = get_python_parser(version, parser_debug) return parse(p, tokens, customize) diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index e14b69cf..129889d8 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -53,12 +53,11 @@ class Python3Parser(PythonParser): def p_list_comprehension(self, args): ''' - # Python3 adds LOAD_LISTCOMP and does list comprehension like + # Python3 scanner adds LOAD_LISTCOMP. Python3 does list comprehension like # other comprehensions (set, dictionary). + # listcomp is a custom rule expr ::= listcomp - listcomp ::= LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 - expr ::= list_compr list_compr ::= BUILD_LIST_0 list_iter @@ -673,6 +672,14 @@ class Python3Parser(PythonParser): ''' def custom_buildclass_rule(self, opname, i, token, tokens, customize): + """ + Python >= 3.3: + buildclass ::= LOAD_BUILD_CLASS mkfunc LOAD_CONST LOAD_CLASSNAME CALL_FUNCTION_3 + Python < 3.3 + buildclass ::= LOAD_BUILD_CLASS LOAD_CONST MAKE_FUNCTION_0 LOAD_CONST + CALL_FUNCTION_n + + """ # look for next MAKE_FUNCTION for i in range(i+1, len(tokens)): @@ -680,11 +687,15 @@ class Python3Parser(PythonParser): break pass assert i < len(tokens) - assert tokens[i+1].type == 'LOAD_CONST' + if self.version >= 3.3: + assert tokens[i+1].type == 'LOAD_CONST' + load_check = 'LOAD_NAME' + else: + load_check = 'LOAD_CONST' # find load names have_loadname = False for i in range(i+1, len(tokens)): - if tokens[i].type == 'LOAD_NAME': + if tokens[i].type == load_check: tokens[i].type = 'LOAD_CLASSNAME' have_loadname = True break @@ -706,9 +717,14 @@ class Python3Parser(PythonParser): j = 0 load_names = '' # customize CALL_FUNCTION - call_function = 'CALL_FUNCTION_%d' % (j + 2) - rule = ("buildclass ::= LOAD_BUILD_CLASS mkfunc LOAD_CONST %s%s" % - (load_names, call_function)) + if self.version >= 3.3: + call_function = 'CALL_FUNCTION_%d' % (j + 2) + rule = ("buildclass ::= LOAD_BUILD_CLASS mkfunc LOAD_CONST %s%s" % + (load_names, call_function)) + else: + call_function = 'CALL_FUNCTION_%d' % (j + 1) + rule = ("buildclass ::= LOAD_BUILD_CLASS mkfunc %s%s" % + (load_names, call_function)) self.add_unique_rule(rule, opname, token.attr, customize) return @@ -717,6 +733,15 @@ class Python3Parser(PythonParser): Special handling for opcodes that take a variable number of arguments -- we add a new rule for each: + Python 3.4: + listcomp ::= LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION_0 expr + GET_ITER CALL_FUNCTION_1 + Python < 3.4 + listcomp ::= LOAD_LISTCOMP MAKE_FUNCTION_0 expr + GET_ITER CALL_FUNCTION_1 + + buildclass (see load_build_class) + build_list ::= {expr}^n BUILD_LIST_n build_list ::= {expr}^n BUILD_TUPLE_n unpack_list ::= UNPACK_LIST {expr}^n @@ -750,6 +775,14 @@ class Python3Parser(PythonParser): + ('kwarg ' * args_kw) + 'expr ' * nak + token.type) self.add_unique_rule(rule, token.type, args_pos, customize) + elif opname == 'LOAD_LISTCOMP': + if self.version >= 3.4: + rule = ("listcomp ::= LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION_0 expr " + "GET_ITER CALL_FUNCTION_1") + else: + rule = ("listcomp ::= LOAD_LISTCOMP MAKE_FUNCTION_0 expr " + "GET_ITER CALL_FUNCTION_1") + self.add_unique_rule(rule, opname, token.attr, customize) elif opname == 'LOAD_BUILD_CLASS': self.custom_buildclass_rule(opname, i, token, tokens, customize) elif opname_base in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): @@ -763,7 +796,10 @@ class Python3Parser(PythonParser): elif opname_base == ('MAKE_FUNCTION'): self.addRule('mklambda ::= %s LOAD_LAMBDA %s' % ('expr ' * token.attr, opname), nop_func) - rule = 'mkfunc ::= %s LOAD_CONST LOAD_CONST %s' % ('expr ' * token.attr, opname) + if self.version >= 3.3: + rule = 'mkfunc ::= %s LOAD_CONST LOAD_CONST %s' % ('expr ' * token.attr, opname) + else: + rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr ' * token.attr, opname) self.add_unique_rule(rule, opname, token.attr, customize) pass return diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 69535244..52047074 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -32,6 +32,14 @@ else: from uncompyle6.opcodes import opcode_25, opcode_26, opcode_27, opcode_32, opcode_33, opcode_34 +class GenericPythonCode: + ''' + Class for representing code-like objects across different versions of + Python. + ''' + def __init__(self): + return + class Code: ''' Class for representing code-objects. @@ -46,9 +54,9 @@ class Code: self._tokens, self._customize = scanner.disassemble(co, classname) class Scanner(object): - opc = None # opcode module def __init__(self, version): + # FIXME: DRY if version == 2.7: self.opc = opcode_27 elif version == 2.6: @@ -61,14 +69,12 @@ class Scanner(object): self.opc = opcode_33 elif version == 3.4: self.opc = opcode_34 + else: + raise TypeError("%i is not a Python version I know about") # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() - def setShowAsm(self, showasm, out=None): - self.showasm = showasm - self.out = out - def setTokenClass(self, tokenClass): # assert isinstance(tokenClass, types.ClassType) self.Token = tokenClass diff --git a/uncompyle6/scanners/scanner25.py b/uncompyle6/scanners/scanner25.py index 82711b48..2e0a0efd 100755 --- a/uncompyle6/scanners/scanner25.py +++ b/uncompyle6/scanners/scanner25.py @@ -12,7 +12,6 @@ Python 3 and other versions of Python. Also, we save token information for later use in deparsing. """ -import inspect from collections import namedtuple from array import array @@ -151,7 +150,11 @@ class Scanner25(scan.Scanner): continue if op in hasconst: const = co.co_consts[oparg] - if inspect.iscode(const): + # We can't use inspect.iscode() because we may be + # using a different version of Python than the + # one that this was byte-compiled on. So the code + # types may mismatch. + if hasattr(const, 'co_name'): oparg = const if const.co_name == '': assert op_name == 'LOAD_CONST' @@ -912,6 +915,7 @@ class Scanner25(scan.Scanner): return targets if __name__ == "__main__": + import inspect co = inspect.currentframe().f_code tokens, customize = Scanner25().disassemble(co) for t in tokens: diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 65594371..fecf951e 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -11,7 +11,6 @@ other versions of Python. Also, we save token information for later use in deparsing. """ -import inspect from collections import namedtuple from array import array @@ -145,7 +144,11 @@ class Scanner26(scan.Scanner): continue if op in hasconst: const = co.co_consts[oparg] - if inspect.iscode(const): + # We can't use inspect.iscode() because we may be + # using a different version of Python than the + # one that this was byte-compiled on. So the code + # types may mismatch. + if hasattr(const, 'co_name'): oparg = const if const.co_name == '': assert op_name == 'LOAD_CONST' @@ -901,6 +904,7 @@ class Scanner26(scan.Scanner): return targets if __name__ == "__main__": + import inspect co = inspect.currentframe().f_code tokens, customize = Scanner26().disassemble(co) for t in tokens: diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index 58df9ef3..42584b0a 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -138,7 +138,11 @@ class Scanner27(scan.Scanner): continue if op in hasconst: const = co.co_consts[oparg] - if inspect.iscode(const): + # We can't use inspect.iscode() because we may be + # using a different version of Python than the + # one that this was byte-compiled on. So the code + # types may mismatch. + if hasattr(const, 'co_name'): oparg = const if const.co_name == '': assert op_name == 'LOAD_CONST' diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py new file mode 100644 index 00000000..89ea3533 --- /dev/null +++ b/uncompyle6/scanners/scanner3.py @@ -0,0 +1,607 @@ +# Copyright (c) 2015 by Rocky Bernstein +""" +Python 3 Generic ytecode scanner/deparser + +This overlaps various Python3's dis module, but it can be run from +Python 2 and other versions of Python. Also, we save token information +for later use in deparsing. +""" + +from __future__ import print_function + +import dis, re +from collections import namedtuple +from array import array + +from uncompyle6.scanner import Token +from uncompyle6 import PYTHON_VERSION, PYTHON3 + + +# Get all the opcodes into globals +globals().update(dis.opmap) +from uncompyle6.opcodes.opcode_33 import * +import uncompyle6.scanner as scan + + +class Scanner3(scan.Scanner): + + def __init__(self): + scan.Scanner.__init__(self, PYTHON_VERSION) + + def disassemble_generic(self, co, classname=None): + """ + Convert code object into a sequence of tokens. + + The below is based on (an older version?) of Python dis.disassemble_bytes(). + """ + # Container for tokens + tokens = [] + customize = {} + self.code = code = array('B', co.co_code) + codelen = len(code) + self.build_lines_data(co) + self.build_prev_op() + + # self.lines contains (block,addrLastInstr) + if classname: + classname = '_' + classname.lstrip('_') + '__' + + def unmangle(name): + if name.startswith(classname) and name[-2:] != '__': + return name[len(classname) - 2:] + return name + + free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] + names = [ unmangle(name) for name in co.co_names ] + varnames = [ unmangle(name) for name in co.co_varnames ] + else: + free = co.co_cellvars + co.co_freevars + names = co.co_names + varnames = co.co_varnames + pass + + # Scan for assertions. Later we will + # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those + # assertions + + self.load_asserts = set() + for i in self.op_range(0, codelen): + if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL: + if names[self.get_argument(i+3)] == 'AssertionError': + self.load_asserts.add(i+3) + + # Get jump targets + # Format: {target offset: [jump offsets]} + jump_targets = self.find_jump_targets() + + # contains (code, [addrRefToCode]) + last_stmt = self.next_stmt[0] + i = self.next_stmt[last_stmt] + replace = {} + + imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) + if len(imports) > 1: + last_import = imports[0] + for i in imports[1:]: + if self.lines[last_import].next > i: + if self.code[last_import] == IMPORT_NAME == self.code[i]: + replace[i] = 'IMPORT_NAME_CONT' + last_import = i + + # Initialize extended arg at 0. When extended arg op is encountered, + # variable preserved for next cycle and added as arg for next op + extended_arg = 0 + + for offset in self.op_range(0, codelen): + # Add jump target tokens + if offset in jump_targets: + jump_idx = 0 + for jump_offset in jump_targets[offset]: + tokens.append(Token('COME_FROM', None, repr(jump_offset), + offset='{}_{}'.format(offset, jump_idx))) + jump_idx += 1 + pass + pass + + op = code[offset] + op_name = opname[op] + + oparg = None; pattr = None + + if op >= HAVE_ARGUMENT: + oparg = self.get_argument(offset) + extended_arg + extended_arg = 0 + if op == EXTENDED_ARG: + extended_arg = oparg * scan.L65536 + continue + if op in hasconst: + const = co.co_consts[oparg] + if not PYTHON3 and isinstance(const, str): + m = re.search('^', const) + if m: + const = scan.GenericPythonCode() + const.co_name = m.group(1) + const.co_filenaame = m.group(3) + const.co_firstlineno = m.group(4) + pass + # We can't use inspect.iscode() because we may be + # using a different version of Python than the + # one that this was byte-compiled on. So the code + # types may mismatch. + if hasattr(const, 'co_name'): + oparg = const + if const.co_name == '': + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' + elif const.co_name == '': + op_name = 'LOAD_GENEXPR' + elif const.co_name == '': + op_name = 'LOAD_DICTCOMP' + elif const.co_name == '': + op_name = 'LOAD_SETCOMP' + elif const.co_name == '': + op_name = 'LOAD_LISTCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + elif op in hasname: + pattr = names[oparg] + elif op in hasjrel: + pattr = repr(offset + 3 + oparg) + elif op in hasjabs: + pattr = repr(oparg) + elif op in haslocal: + pattr = varnames[oparg] + elif op in hascompare: + pattr = cmp_op[oparg] + elif op in hasfree: + pattr = free[oparg] + + if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, + UNPACK_SEQUENCE, + MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, + CALL_FUNCTION_VAR, CALL_FUNCTION_KW, + CALL_FUNCTION_VAR_KW, RAISE_VARARGS + ): + # As of Python 2.5, values loaded via LOAD_CLOSURE are packed into + # a tuple before calling MAKE_CLOSURE. + if (op == BUILD_TUPLE and + self.code[self.prev_op[offset]] == LOAD_CLOSURE): + continue + else: + # CALL_FUNCTION OP renaming is done as a custom rule in parse3 + if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', + 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): + op_name = '%s_%d' % (op_name, oparg) + if op != BUILD_SLICE: + customize[op_name] = oparg + elif op == JUMP_ABSOLUTE: + target = self.get_target(offset) + if target < offset: + if (offset in self.stmts + and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) + and offset not in self.not_continue): + op_name = 'CONTINUE' + else: + op_name = 'JUMP_BACK' + + elif op == LOAD_GLOBAL: + if offset in self.load_asserts: + op_name = 'LOAD_ASSERT' + elif op == RETURN_VALUE: + if offset in self.return_end_ifs: + op_name = 'RETURN_END_IF' + + if offset in self.linestarts: + linestart = self.linestarts[offset] + else: + linestart = None + + if offset not in replace: + tokens.append(Token(op_name, oparg, pattr, offset, linestart)) + else: + tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) + pass + return tokens, customize + + def build_lines_data(self, code_obj): + """ + Generate various line-related helper data. + """ + # Offset: lineno pairs, only for offsets which start line. + # Locally we use list for more convenient iteration using indices + linestarts = list(dis.findlinestarts(code_obj)) + self.linestarts = dict(linestarts) + # Plain set with offsets of first ops on line + self.linestart_offsets = {a for (a, _) in linestarts} + # 'List-map' which shows line number of current op and offset of + # first op on following line, given offset of op as index + self.lines = lines = [] + LineTuple = namedtuple('LineTuple', ['l_no', 'next']) + # Iterate through available linestarts, and fill + # the data for all code offsets encountered until + # last linestart offset + _, prev_line_no = linestarts[0] + offset = 0 + for start_offset, line_no in linestarts[1:]: + while offset < start_offset: + lines.append(LineTuple(prev_line_no, start_offset)) + offset += 1 + prev_line_no = line_no + # Fill remaining offsets with reference to last line number + # and code length as start offset of following non-existing line + codelen = len(self.code) + while offset < codelen: + lines.append(LineTuple(prev_line_no, codelen)) + offset += 1 + + def build_prev_op(self): + """ + Compose 'list-map' which allows to jump to previous + op, given offset of current op as index. + """ + code = self.code + codelen = len(code) + self.prev_op = [0] + for offset in self.op_range(0, codelen): + op = code[offset] + for _ in range(self.op_size(op)): + self.prev_op.append(offset) + + def op_size(self, op): + """ + Return size of operator with its arguments + for given opcode . + """ + if op < dis.HAVE_ARGUMENT: + return 1 + else: + return 3 + + def find_jump_targets(self): + """ + Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + This procedure is modelled after dis.findlables(), but here + for each target the number of jumps is counted. + """ + code = self.code + codelen = len(code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': codelen-1}] + + # All loop entry points + # self.loops = [] + # Map fixed jumps to their real destination + self.fixed_jumps = {} + self.ignore_if = set() + self.build_statement_indices() + # Containers filled by detect_structure() + self.not_continue = set() + self.return_end_ifs = set() + + targets = {} + for offset in self.op_range(0, codelen): + op = code[offset] + + # Determine structures and fix jumps for 2.3+ + self.detect_structure(offset) + + if op >= dis.HAVE_ARGUMENT: + label = self.fixed_jumps.get(offset) + oparg = code[offset+1] + code[offset+2] * 256 + + if label is None: + if op in dis.hasjrel and op != FOR_ITER: + label = offset + 3 + oparg + elif op in dis.hasjabs: + if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + if oparg > offset: + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [offset] + elif op == END_FINALLY and offset in self.fixed_jumps: + label = self.fixed_jumps[offset] + targets[label] = targets.get(label, []) + [offset] + return targets + + def build_statement_indices(self): + code = self.code + start = 0 + end = codelen = len(code) + + statement_opcodes = { + SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, + SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, + POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, + STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, + STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, + RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, + JUMP_ABSOLUTE + } + + statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE), + (POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)] + + designator_ops = { + STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, + STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE + } + + # Compose preliminary list of indices with statements, + # using plain statement opcodes + prelim = self.all_instr(start, end, statement_opcodes) + + # Initialize final container with statements with + # preliminnary data + stmts = self.stmts = set(prelim) + + # Same for opcode sequences + pass_stmts = set() + for sequence in statement_opcode_sequences: + for i in self.op_range(start, end-(len(sequence)+1)): + match = True + for elem in sequence: + if elem != code[i]: + match = False + break + i += self.op_size(code[i]) + + if match is True: + i = self.prev_op[i] + stmts.add(i) + pass_stmts.add(i) + + # Initialize statement list with the full data we've gathered so far + if pass_stmts: + stmt_offset_list = list(stmts) + stmt_offset_list.sort() + else: + stmt_offset_list = prelim + # 'List-map' which contains offset of start of + # next statement, when op offset is passed as index + self.next_stmt = slist = [] + last_stmt_offset = -1 + i = 0 + # Go through all statement offsets + for stmt_offset in stmt_offset_list: + # Process absolute jumps, but do not remove 'pass' statements + # from the set + if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts: + # If absolute jump occurs in forward direction or it takes off from the + # same line as previous statement, this is not a statement + target = self.get_target(stmt_offset) + if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: + stmts.remove(stmt_offset) + continue + # Rewing ops till we encounter non-JA one + j = self.prev_op[stmt_offset] + while code[j] == JUMP_ABSOLUTE: + j = self.prev_op[j] + # If we got here, then it's list comprehension which + # is not a statement too + if code[j] == LIST_APPEND: + stmts.remove(stmt_offset) + continue + # Exclude ROT_TWO + POP_TOP + elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO: + stmts.remove(stmt_offset) + continue + # Exclude FOR_ITER + designators + elif code[stmt_offset] in designator_ops: + j = self.prev_op[stmt_offset] + while code[j] in designator_ops: + j = self.prev_op[j] + if code[j] == FOR_ITER: + stmts.remove(stmt_offset) + continue + # Add to list another list with offset of current statement, + # equal to length of previous statement + slist += [stmt_offset] * (stmt_offset-i) + last_stmt_offset = stmt_offset + i = stmt_offset + # Finish filling the list for last statement + slist += [codelen] * (codelen-len(slist)) + + def get_target(self, offset): + """ + Get target offset for op located at given . + """ + op = self.code[offset] + target = self.code[offset+1] + self.code[offset+2] * 256 + if op in dis.hasjrel: + target += offset + 3 + return target + + def detect_structure(self, offset): + """ + Detect structures and their boundaries to fix optimizied jumps + in python2.3+ + """ + code = self.code + op = code[offset] + # Detect parent structure + parent = self.structs[0] + start = parent['start'] + end = parent['end'] + + # Pick inner-most parent for our offset + for struct in self.structs: + curent_start = struct['start'] + curent_end = struct['end'] + if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end): + start = curent_start + end = curent_end + parent = struct + + if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): + start = offset + self.op_size(op) + target = self.get_target(offset) + rtarget = self.restrict_to_parent(target, parent) + prev_op = self.prev_op + + # Do not let jump to go out of parent struct bounds + if target != rtarget and parent['type'] == 'and/or': + self.fixed_jumps[offset] = rtarget + return + + # Does this jump to right after another cond jump? + # If so, it's part of a larger conditional + if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, + POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset): + self.fixed_jumps[offset] = prev_op[target] + self.structs.append({'type': 'and/or', + 'start': start, + 'end': prev_op[target]}) + return + # Is it an and inside if block + if op == POP_JUMP_IF_FALSE: + # Search for other POP_JUMP_IF_FALSE targetting the same op, + # in current statement, starting from current offset, and filter + # everything inside inner 'or' jumps and midline ifs + match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target) + match = self.remove_mid_line_ifs(match) + # If we still have any offsets in set, start working on it + if match: + if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and + self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget): + if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and + target == self.get_target(prev_op[prev_op[rtarget]]) and + (prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and + 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))): + pass + elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and + 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], + (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) | + set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], + (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE), + prev_op[rtarget], True)))))): + pass + else: + fix = None + jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE) + last_jump_good = True + for j in jump_ifs: + if target == self.get_target(j): + if self.lines[j].next == j + 3 and last_jump_good: + fix = j + break + else: + last_jump_good = False + self.fixed_jumps[offset] = fix or match[-1] + return + else: + self.fixed_jumps[offset] = match[-1] + return + # op == POP_JUMP_IF_TRUE + else: + next = self.next_stmt[offset] + if prev_op[next] == offset: + pass + elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next): + if code[prev_op[next]] == POP_JUMP_IF_FALSE: + if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE): + self.fixed_jumps[offset] = prev_op[next] + return + elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and + self.get_target(target) == self.get_target(next)): + self.fixed_jumps[offset] = prev_op[next] + return + + # Don't add a struct for a while test, it's already taken care of + if offset in self.ignore_if: + return + + if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and + prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and + not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)): + rtarget = prev_op[rtarget] + + # Does the if jump just beyond a jump op, then this is probably an if statement + if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD): + if_end = self.get_target(prev_op[rtarget]) + + # Is this a loop not an if? + if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP): + if(if_end > start): + return + + end = self.restrict_to_parent(if_end, parent) + + self.structs.append({'type': 'if-then', + 'start': start, + 'end': prev_op[rtarget]}) + self.not_continue.add(prev_op[rtarget]) + + if rtarget < end: + self.structs.append({'type': 'if-else', + 'start': rtarget, + 'end': end}) + elif code[prev_op[rtarget]] == RETURN_VALUE: + self.structs.append({'type': 'if-then', + 'start': start, + 'end': rtarget}) + self.return_end_ifs.add(prev_op[rtarget]) + + elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + target = self.get_target(offset) + if target > offset: + unop_target = self.last_instr(offset, target, JUMP_FORWARD, target) + if unop_target and code[unop_target+3] != ROT_TWO: + self.fixed_jumps[offset] = unop_target + else: + self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) + + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + """ + Find offsets of all requested between and , + optionally ing specified offset, and return list found + offsets which are not within any POP_JUMP_IF_TRUE jumps. + """ + # Find all offsets of requested instructions + instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) + # Get all POP_JUMP_IF_TRUE (or) offsets + pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE) + filtered = [] + for pjit_offset in pjit_offsets: + pjit_tgt = self.get_target(pjit_offset) - 3 + for instr_offset in instr_offsets: + if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: + filtered.append(instr_offset) + instr_offsets = filtered + filtered = [] + return instr_offsets + + def remove_mid_line_ifs(self, ifs): + """ + Go through passed offsets, filtering ifs + located somewhere mid-line. + """ + filtered = [] + for if_ in ifs: + # For each offset, if line number of current and next op + # is the same + if self.lines[if_].l_no == self.lines[if_+3].l_no: + # Check if last op on line is PJIT or PJIF, and if it is - skip it + if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE): + continue + filtered.append(if_) + return filtered + +if __name__ == "__main__": + import inspect + co = inspect.currentframe().f_code + tokens, customize = Scanner3().disassemble_generic(co) + for t in tokens: + print(t) + pass diff --git a/uncompyle6/scanners/scanner32.py b/uncompyle6/scanners/scanner32.py index 8660c61e..f197015e 100644 --- a/uncompyle6/scanners/scanner32.py +++ b/uncompyle6/scanners/scanner32.py @@ -9,19 +9,21 @@ for later use in deparsing. from __future__ import print_function -import uncompyle6.scanners.scanner33 as scan33 -import uncompyle6.scanner as scan +import uncompyle6.scanners.scanner3 as scan3 -class Scanner32(scan.Scanner): - def __init__(self): - scan.Scanner.__init__(self, 3.2) # check +import uncompyle6.opcodes.opcode_34 +# verify uses JUMP_OPs from here +JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs + +class Scanner32(scan3.Scanner3): def disassemble(self, co, classname=None): - return scan33.Scanner33().disassemble(co, classname) + return self.disassemble_generic(co, classname) if __name__ == "__main__": + import inspect co = inspect.currentframe().f_code - tokens, customize = Scanner33().disassemble(co) + tokens, customize = Scanner32().disassemble(co) for t in tokens: print(t) pass diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py index dcadc364..0c1f3de6 100644 --- a/uncompyle6/scanners/scanner33.py +++ b/uncompyle6/scanners/scanner33.py @@ -1,6 +1,6 @@ # Copyright (c) 2015 by Rocky Bernstein """ -Python 3.3 bytecode scanner/deparser +Python 3 bytecode scanner/deparser This overlaps Python's 3.3's dis module, but it can be run from Python 2 and other versions of Python. Also, we save token information @@ -9,598 +9,19 @@ for later use in deparsing. from __future__ import print_function -import dis, inspect, marshal -from collections import namedtuple -from array import array +import uncompyle6.scanners.scanner3 as scan3 -from uncompyle6.scanner import Token, L65536 +import uncompyle6.opcodes.opcode_33 +# verify uses JUMP_OPs from here +JUMP_OPs = uncompyle6.opcodes.opcode_33.JUMP_OPs - -# Get all the opcodes into globals -globals().update(dis.opmap) -from uncompyle6.opcodes.opcode_27 import * -import uncompyle6.scanner as scan - - -class Scanner33(scan.Scanner): - def __init__(self): - scan.Scanner.__init__(self, 3.2) # check - - def run(self, bytecode): - code_object = marshal.loads(bytecode) - tokens = self.tokenize(code_object) - return tokens +class Scanner33(scan3.Scanner3): def disassemble(self, co, classname=None): - """ - Convert code object into a sequence of tokens. - - The below is based on (an older version?) of Python dis.disassemble_bytes(). - """ - # Container for tokens - tokens = [] - customize = {} - self.code = code = array('B', co.co_code) - codelen = len(code) - self.build_lines_data(co) - self.build_prev_op() - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - - free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] - names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] - else: - free = co.co_cellvars + co.co_freevars - names = co.co_names - varnames = co.co_varnames - pass - - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those - # assertions - - self.load_asserts = set() - for i in self.op_range(0, codelen): - if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL: - if names[self.get_argument(i+3)] == 'AssertionError': - self.load_asserts.add(i+3) - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets() - - # contains (code, [addrRefToCode]) - last_stmt = self.next_stmt[0] - i = self.next_stmt[last_stmt] - replace = {} - while i < codelen-1: - if self.lines[last_stmt].next > i: - if self.code[last_stmt] == PRINT_ITEM: - if self.code[i] == PRINT_ITEM: - replace[i] = 'PRINT_ITEM_CONT' - elif self.code[i] == PRINT_NEWLINE: - replace[i] = 'PRINT_NEWLINE_CONT' - last_stmt = i - i = self.next_stmt[i] - - imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) - if len(imports) > 1: - last_import = imports[0] - for i in imports[1:]: - if self.lines[last_import].next > i: - if self.code[last_import] == IMPORT_NAME == self.code[i]: - replace[i] = 'IMPORT_NAME_CONT' - last_import = i - - # Initialize extended arg at 0. When extended arg op is encountered, - # variable preserved for next cycle and added as arg for next op - extended_arg = 0 - - for offset in self.op_range(0, codelen): - # Add jump target tokens - if offset in jump_targets: - jump_idx = 0 - for jump_offset in jump_targets[offset]: - tokens.append(Token('COME_FROM', None, repr(jump_offset), - offset='{}_{}'.format(offset, jump_idx))) - jump_idx += 1 - pass - pass - - op = code[offset] - op_name = opname[op] - - oparg = None; pattr = None - - if op >= HAVE_ARGUMENT: - oparg = self.get_argument(offset) + extended_arg - extended_arg = 0 - if op == EXTENDED_ARG: - extended_arg = oparg * scan.L65536 - continue - if op in hasconst: - const = co.co_consts[oparg] - if inspect.iscode(const): - oparg = const - if const.co_name == '': - assert op_name == 'LOAD_CONST' - op_name = 'LOAD_LAMBDA' - elif const.co_name == '': - op_name = 'LOAD_GENEXPR' - elif const.co_name == '': - op_name = 'LOAD_DICTCOMP' - elif const.co_name == '': - op_name = 'LOAD_SETCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - elif op in hasname: - pattr = names[oparg] - elif op in hasjrel: - pattr = repr(offset + 3 + oparg) - elif op in hasjabs: - pattr = repr(oparg) - elif op in haslocal: - pattr = varnames[oparg] - elif op in hascompare: - pattr = cmp_op[oparg] - elif op in hasfree: - pattr = free[oparg] - - if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, - UNPACK_SEQUENCE, - MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, - CALL_FUNCTION_VAR, CALL_FUNCTION_KW, - CALL_FUNCTION_VAR_KW, RAISE_VARARGS - ): - # As of Python 2.5, values loaded via LOAD_CLOSURE are packed into - # a tuple before calling MAKE_CLOSURE. - if (op == BUILD_TUPLE and - self.code[self.prev_op[offset]] == LOAD_CLOSURE): - continue - else: - # CALL_FUNCTION OP renaming is done as a custom rule in parse3 - if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', - 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): - op_name = '%s_%d' % (op_name, oparg) - if op != BUILD_SLICE: - customize[op_name] = oparg - elif op == JUMP_ABSOLUTE: - target = self.get_target(offset) - if target < offset: - if (offset in self.stmts - and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) - and offset not in self.not_continue): - op_name = 'CONTINUE' - else: - op_name = 'JUMP_BACK' - - elif op == LOAD_GLOBAL: - if offset in self.load_asserts: - op_name = 'LOAD_ASSERT' - elif op == RETURN_VALUE: - if offset in self.return_end_ifs: - op_name = 'RETURN_END_IF' - - if offset in self.linestarts: - linestart = self.linestarts[offset] - else: - linestart = None - - if offset not in replace: - tokens.append(Token(op_name, oparg, pattr, offset, linestart)) - else: - tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) - pass - return tokens, customize - - def build_lines_data(self, code_obj): - """ - Generate various line-related helper data. - """ - # Offset: lineno pairs, only for offsets which start line. - # Locally we use list for more convenient iteration using indices - linestarts = list(dis.findlinestarts(code_obj)) - self.linestarts = dict(linestarts) - # Plain set with offsets of first ops on line - self.linestart_offsets = {a for (a, _) in linestarts} - # 'List-map' which shows line number of current op and offset of - # first op on following line, given offset of op as index - self.lines = lines = [] - LineTuple = namedtuple('LineTuple', ['l_no', 'next']) - # Iterate through available linestarts, and fill - # the data for all code offsets encountered until - # last linestart offset - _, prev_line_no = linestarts[0] - offset = 0 - for start_offset, line_no in linestarts[1:]: - while offset < start_offset: - lines.append(LineTuple(prev_line_no, start_offset)) - offset += 1 - prev_line_no = line_no - # Fill remaining offsets with reference to last line number - # and code length as start offset of following non-existing line - codelen = len(self.code) - while offset < codelen: - lines.append(LineTuple(prev_line_no, codelen)) - offset += 1 - - def build_prev_op(self): - """ - Compose 'list-map' which allows to jump to previous - op, given offset of current op as index. - """ - code = self.code - codelen = len(code) - self.prev_op = [0] - for offset in self.op_range(0, codelen): - op = code[offset] - for _ in range(self.op_size(op)): - self.prev_op.append(offset) - - def op_size(self, op): - """ - Return size of operator with its arguments - for given opcode . - """ - if op < dis.HAVE_ARGUMENT: - return 1 - else: - return 3 - - def find_jump_targets(self): - """ - Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - This procedure is modelled after dis.findlables(), but here - for each target the number of jumps is counted. - """ - code = self.code - codelen = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': codelen-1}] - - # All loop entry points - # self.loops = [] - # Map fixed jumps to their real destination - self.fixed_jumps = {} - self.ignore_if = set() - self.build_statement_indices() - # Containers filled by detect_structure() - self.not_continue = set() - self.return_end_ifs = set() - - targets = {} - for offset in self.op_range(0, codelen): - op = code[offset] - - # Determine structures and fix jumps for 2.3+ - self.detect_structure(offset) - - if op >= dis.HAVE_ARGUMENT: - label = self.fixed_jumps.get(offset) - oparg = code[offset+1] + code[offset+2] * 256 - - if label is None: - if op in dis.hasjrel and op != FOR_ITER: - label = offset + 3 + oparg - elif op in dis.hasjabs: - if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - if oparg > offset: - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [offset] - elif op == END_FINALLY and offset in self.fixed_jumps: - label = self.fixed_jumps[offset] - targets[label] = targets.get(label, []) + [offset] - return targets - - # FIXME Create and move to scanner3 - def build_statement_indices(self): - code = self.code - start = 0 - end = codelen = len(code) - - statement_opcodes = { - SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, - SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, - POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, - STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, - STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, - RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, - JUMP_ABSOLUTE - } - - statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE), - (POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)] - - designator_ops = { - STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, - STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE - } - - # Compose preliminary list of indices with statements, - # using plain statement opcodes - prelim = self.all_instr(start, end, statement_opcodes) - - # Initialize final container with statements with - # preliminnary data - stmts = self.stmts = set(prelim) - - # Same for opcode sequences - pass_stmts = set() - for sequence in statement_opcode_sequences: - for i in self.op_range(start, end-(len(sequence)+1)): - match = True - for elem in sequence: - if elem != code[i]: - match = False - break - i += self.op_size(code[i]) - - if match is True: - i = self.prev_op[i] - stmts.add(i) - pass_stmts.add(i) - - # Initialize statement list with the full data we've gathered so far - if pass_stmts: - stmt_offset_list = list(stmts) - stmt_offset_list.sort() - else: - stmt_offset_list = prelim - # 'List-map' which contains offset of start of - # next statement, when op offset is passed as index - self.next_stmt = slist = [] - last_stmt_offset = -1 - i = 0 - # Go through all statement offsets - for stmt_offset in stmt_offset_list: - # Process absolute jumps, but do not remove 'pass' statements - # from the set - if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts: - # If absolute jump occurs in forward direction or it takes off from the - # same line as previous statement, this is not a statement - target = self.get_target(stmt_offset) - if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: - stmts.remove(stmt_offset) - continue - # Rewing ops till we encounter non-JA one - j = self.prev_op[stmt_offset] - while code[j] == JUMP_ABSOLUTE: - j = self.prev_op[j] - # If we got here, then it's list comprehension which - # is not a statement too - if code[j] == LIST_APPEND: - stmts.remove(stmt_offset) - continue - # Exclude ROT_TWO + POP_TOP - elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO: - stmts.remove(stmt_offset) - continue - # Exclude FOR_ITER + designators - elif code[stmt_offset] in designator_ops: - j = self.prev_op[stmt_offset] - while code[j] in designator_ops: - j = self.prev_op[j] - if code[j] == FOR_ITER: - stmts.remove(stmt_offset) - continue - # Add to list another list with offset of current statement, - # equal to length of previous statement - slist += [stmt_offset] * (stmt_offset-i) - last_stmt_offset = stmt_offset - i = stmt_offset - # Finish filling the list for last statement - slist += [codelen] * (codelen-len(slist)) - - # FIXME Create and move to scanner3 - def get_target(self, offset): - """ - Get target offset for op located at given . - """ - op = self.code[offset] - target = self.code[offset+1] + self.code[offset+2] * 256 - if op in dis.hasjrel: - target += offset + 3 - return target - - # FIXME Create and move to scanner3 - def detect_structure(self, offset): - """ - Detect structures and their boundaries to fix optimizied jumps - in python2.3+ - """ - code = self.code - op = code[offset] - # Detect parent structure - parent = self.structs[0] - start = parent['start'] - end = parent['end'] - - # Pick inner-most parent for our offset - for struct in self.structs: - curent_start = struct['start'] - curent_end = struct['end'] - if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end): - start = curent_start - end = curent_end - parent = struct - - if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): - start = offset + self.op_size(op) - target = self.get_target(offset) - rtarget = self.restrict_to_parent(target, parent) - prev_op = self.prev_op - - # Do not let jump to go out of parent struct bounds - if target != rtarget and parent['type'] == 'and/or': - self.fixed_jumps[offset] = rtarget - return - - # Does this jump to right after another cond jump? - # If so, it's part of a larger conditional - if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, - POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset): - self.fixed_jumps[offset] = prev_op[target] - self.structs.append({'type': 'and/or', - 'start': start, - 'end': prev_op[target]}) - return - # Is it an and inside if block - if op == POP_JUMP_IF_FALSE: - # Search for other POP_JUMP_IF_FALSE targetting the same op, - # in current statement, starting from current offset, and filter - # everything inside inner 'or' jumps and midline ifs - match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target) - match = self.remove_mid_line_ifs(match) - # If we still have any offsets in set, start working on it - if match: - if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and - self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget): - if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and - target == self.get_target(prev_op[prev_op[rtarget]]) and - (prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and - 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))): - pass - elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and - 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) | - set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE), - prev_op[rtarget], True)))))): - pass - else: - fix = None - jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE) - last_jump_good = True - for j in jump_ifs: - if target == self.get_target(j): - if self.lines[j].next == j + 3 and last_jump_good: - fix = j - break - else: - last_jump_good = False - self.fixed_jumps[offset] = fix or match[-1] - return - else: - self.fixed_jumps[offset] = match[-1] - return - # op == POP_JUMP_IF_TRUE - else: - next = self.next_stmt[offset] - if prev_op[next] == offset: - pass - elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next): - if code[prev_op[next]] == POP_JUMP_IF_FALSE: - if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE): - self.fixed_jumps[offset] = prev_op[next] - return - elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and - self.get_target(target) == self.get_target(next)): - self.fixed_jumps[offset] = prev_op[next] - return - - # Don't add a struct for a while test, it's already taken care of - if offset in self.ignore_if: - return - - if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and - prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and - not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)): - rtarget = prev_op[rtarget] - - # Does the if jump just beyond a jump op, then this is probably an if statement - if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD): - if_end = self.get_target(prev_op[rtarget]) - - # Is this a loop not an if? - if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP): - if(if_end > start): - return - - end = self.restrict_to_parent(if_end, parent) - - self.structs.append({'type': 'if-then', - 'start': start, - 'end': prev_op[rtarget]}) - self.not_continue.add(prev_op[rtarget]) - - if rtarget < end: - self.structs.append({'type': 'if-else', - 'start': rtarget, - 'end': end}) - elif code[prev_op[rtarget]] == RETURN_VALUE: - self.structs.append({'type': 'if-then', - 'start': start, - 'end': rtarget}) - self.return_end_ifs.add(prev_op[rtarget]) - - elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - target = self.get_target(offset) - if target > offset: - unop_target = self.last_instr(offset, target, JUMP_FORWARD, target) - if unop_target and code[unop_target+3] != ROT_TWO: - self.fixed_jumps[offset] = unop_target - else: - self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) - - # FIXME Create and move to scanner3 - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): - """ - Find offsets of all requested between and , - optionally ing specified offset, and return list found - offsets which are not within any POP_JUMP_IF_TRUE jumps. - """ - # Find all offsets of requested instructions - instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) - # Get all POP_JUMP_IF_TRUE (or) offsets - pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE) - filtered = [] - for pjit_offset in pjit_offsets: - pjit_tgt = self.get_target(pjit_offset) - 3 - for instr_offset in instr_offsets: - if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: - filtered.append(instr_offset) - instr_offsets = filtered - filtered = [] - return instr_offsets - - # FIXME Create and move to scanner3 - def remove_mid_line_ifs(self, ifs): - """ - Go through passed offsets, filtering ifs - located somewhere mid-line. - """ - filtered = [] - for if_ in ifs: - # For each offset, if line number of current and next op - # is the same - if self.lines[if_].l_no == self.lines[if_+3].l_no: - # Check if last op on line is PJIT or PJIF, and if it is - skip it - if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE): - continue - filtered.append(if_) - return filtered + return self.disassemble_generic(co, classname) if __name__ == "__main__": + import inspect co = inspect.currentframe().f_code tokens, customize = Scanner33().disassemble(co) for t in tokens: diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index 2314eacd..a5a22a94 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -11,34 +11,26 @@ for later use in deparsing. from __future__ import print_function import dis, inspect -from collections import namedtuple from array import array +import uncompyle6.scanners.scanner3 as scan3 from uncompyle6 import PYTHON_VERSION from uncompyle6.scanner import Token -import uncompyle6.opcodes.opcode_34 # Get all the opcodes into globals -JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs globals().update(dis.opmap) +import uncompyle6.opcodes.opcode_34 +# verify uses JUMP_OPs from here +JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs + from uncompyle6.opcodes.opcode_34 import * -import uncompyle6.scanner as scan -import uncompyle6.scanners.scanner33 as scan33 - - -class Scanner34(scan.Scanner): - def __init__(self): - scan.Scanner.__init__(self, 3.4) # check - - def get_argument(self, bytecode, pos): - arg = bytecode[pos+1] + bytecode[pos+2] * 256 - return arg +class Scanner34(scan3.Scanner3): def disassemble(self, co, classname=None): fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \ - else self.disassemble_cross_version + else self.disassemble_generic return fn(co, classname) def disassemble_built_in(self, co, classname=None): @@ -167,255 +159,7 @@ class Scanner34(scan.Scanner): pass return tokens, {} - # FIXME Create and move to scanner3 - def disassemble_cross_version(self, co, classname=None): - return scan33.Scanner33().disassemble(co, classname) - - # FIXME Create and move to scanner3 - def build_lines_data(self, code_obj): - """ - Generate various line-related helper data. - """ - # Offset: lineno pairs, only for offsets which start line. - # Locally we use list for more convenient iteration using indices - linestarts = list(dis.findlinestarts(code_obj)) - self.linestarts = dict(linestarts) - # Plain set with offsets of first ops on line - self.linestart_offsets = {a for (a, _) in linestarts} - # 'List-map' which shows line number of current op and offset of - # first op on following line, given offset of op as index - self.lines = lines = [] - LineTuple = namedtuple('LineTuple', ['l_no', 'next']) - # Iterate through available linestarts, and fill - # the data for all code offsets encountered until - # last linestart offset - _, prev_line_no = linestarts[0] - offset = 0 - for start_offset, line_no in linestarts[1:]: - while offset < start_offset: - lines.append(LineTuple(prev_line_no, start_offset)) - offset += 1 - prev_line_no = line_no - # Fill remaining offsets with reference to last line number - # and code length as start offset of following non-existing line - codelen = len(self.code) - while offset < codelen: - lines.append(LineTuple(prev_line_no, codelen)) - offset += 1 - - # FIXME Create and move to scanner3 - def build_prev_op(self): - """ - Compose 'list-map' which allows to jump to previous - op, given offset of current op as index. - """ - code = self.code - codelen = len(code) - self.prev_op = [0] - for offset in self.op_range(0, codelen): - op = code[offset] - for _ in range(self.op_size(op)): - self.prev_op.append(offset) - - # FIXME Create and move to scanner3 - def op_size(self, op): - """ - Return size of operator with its arguments - for given opcode . - """ - if op < dis.HAVE_ARGUMENT: - return 1 - else: - return 3 - - def find_jump_targets(self): - """ - Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - This procedure is modelled after dis.findlables(), but here - for each target the number of jumps is counted. - """ - code = self.code - codelen = len(code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': codelen-1}] - - # All loop entry points - # self.loops = [] - # Map fixed jumps to their real destination - self.fixed_jumps = {} - self.ignore_if = set() - self.build_statement_indices() - # Containers filled by detect_structure() - self.not_continue = set() - self.return_end_ifs = set() - - targets = {} - for offset in self.op_range(0, codelen): - op = code[offset] - - # Determine structures and fix jumps for 2.3+ - self.detect_structure(offset) - - if op >= dis.HAVE_ARGUMENT: - label = self.fixed_jumps.get(offset) - oparg = code[offset+1] + code[offset+2] * 256 - - if label is None: - if op in dis.hasjrel and op != FOR_ITER: - label = offset + 3 + oparg - elif op in dis.hasjabs: - if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - if oparg > offset: - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [offset] - elif op == END_FINALLY and offset in self.fixed_jumps: - label = self.fixed_jumps[offset] - targets[label] = targets.get(label, []) + [offset] - return targets - - def build_statement_indices(self): - code = self.code - start = 0 - end = codelen = len(code) - - statement_opcodes = { - SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, - SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, - POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, - STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, - STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, - RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, - JUMP_ABSOLUTE - } - - statement_opcode_sequences = [(POP_JUMP_IF_FALSE, JUMP_FORWARD), (POP_JUMP_IF_FALSE, JUMP_ABSOLUTE), - (POP_JUMP_IF_TRUE, JUMP_FORWARD), (POP_JUMP_IF_TRUE, JUMP_ABSOLUTE)] - - designator_ops = { - STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, - STORE_SUBSCR, UNPACK_SEQUENCE, JUMP_ABSOLUTE - } - - # Compose preliminary list of indices with statements, - # using plain statement opcodes - prelim = self.all_instr(start, end, statement_opcodes) - - # Initialize final container with statements with - # preliminnary data - stmts = self.stmts = set(prelim) - - # Same for opcode sequences - pass_stmts = set() - for sequence in statement_opcode_sequences: - for i in self.op_range(start, end-(len(sequence)+1)): - match = True - for elem in sequence: - if elem != code[i]: - match = False - break - i += self.op_size(code[i]) - - if match is True: - i = self.prev_op[i] - stmts.add(i) - pass_stmts.add(i) - - # Initialize statement list with the full data we've gathered so far - if pass_stmts: - stmt_offset_list = list(stmts) - stmt_offset_list.sort() - else: - stmt_offset_list = prelim - # 'List-map' which contains offset of start of - # next statement, when op offset is passed as index - self.next_stmt = slist = [] - last_stmt_offset = -1 - i = 0 - # Go through all statement offsets - for stmt_offset in stmt_offset_list: - # Process absolute jumps, but do not remove 'pass' statements - # from the set - if code[stmt_offset] == JUMP_ABSOLUTE and stmt_offset not in pass_stmts: - # If absolute jump occurs in forward direction or it takes off from the - # same line as previous statement, this is not a statement - target = self.get_target(stmt_offset) - if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: - stmts.remove(stmt_offset) - continue - # Rewing ops till we encounter non-JA one - j = self.prev_op[stmt_offset] - while code[j] == JUMP_ABSOLUTE: - j = self.prev_op[j] - # If we got here, then it's list comprehension which - # is not a statement too - if code[j] == LIST_APPEND: - stmts.remove(stmt_offset) - continue - # Exclude ROT_TWO + POP_TOP - elif code[stmt_offset] == POP_TOP and code[self.prev_op[stmt_offset]] == ROT_TWO: - stmts.remove(stmt_offset) - continue - # Exclude FOR_ITER + designators - elif code[stmt_offset] in designator_ops: - j = self.prev_op[stmt_offset] - while code[j] in designator_ops: - j = self.prev_op[j] - if code[j] == FOR_ITER: - stmts.remove(stmt_offset) - continue - # Add to list another list with offset of current statement, - # equal to length of previous statement - slist += [stmt_offset] * (stmt_offset-i) - last_stmt_offset = stmt_offset - i = stmt_offset - # Finish filling the list for last statement - slist += [codelen] * (codelen-len(slist)) - - # FIXME Create and move to scanner3 - def get_target(self, offset): - """ - Get target offset for op located at given . - """ - op = self.code[offset] - target = self.code[offset+1] + self.code[offset+2] * 256 - if op in dis.hasjrel: - target += offset + 3 - return target - - def next_except_jump(self, start): - """ - Return the next jump that was generated by an except SomeException: - construct in a try...except...else clause or None if not found. - """ - - if self.code[start] == DUP_TOP: - except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) - if except_match: - jmp = self.prev_op[self.get_target(except_match)] - self.ignore_if.add(except_match) - self.not_continue.add(jmp) - return jmp - - count_END_FINALLY = 0 - count_SETUP_ = 0 - for i in self.op_range(start, len(self.code)): - op = self.code[i] - if op == END_FINALLY: - if count_END_FINALLY == count_SETUP_: - assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE) - self.not_continue.add(self.prev_op[i]) - return self.prev_op[i] - count_END_FINALLY += 1 - elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): - count_SETUP_ += 1 - - # FIXME Create and move to scanner3 + # FIXME: merge with scanner3 code def detect_structure(self, offset): """ Detect structures and their boundaries to fix optimizied jumps @@ -598,41 +342,32 @@ class Scanner34(scan.Scanner): else: self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) - def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + def next_except_jump(self, start): """ - Find offsets of all requested between and , - optionally ing specified offset, and return list found - offsets which are not within any POP_JUMP_IF_TRUE jumps. + Return the next jump that was generated by an except SomeException: + construct in a try...except...else clause or None if not found. """ - # Find all offsets of requested instructions - instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target) - # Get all POP_JUMP_IF_TRUE (or) offsets - pjit_offsets = self.all_instr(start, end, POP_JUMP_IF_TRUE) - filtered = [] - for pjit_offset in pjit_offsets: - pjit_tgt = self.get_target(pjit_offset) - 3 - for instr_offset in instr_offsets: - if instr_offset <= pjit_offset or instr_offset >= pjit_tgt: - filtered.append(instr_offset) - instr_offsets = filtered - filtered = [] - return instr_offsets - def remove_mid_line_ifs(self, ifs): - """ - Go through passed offsets, filtering ifs - located somewhere mid-line. - """ - filtered = [] - for if_ in ifs: - # For each offset, if line number of current and next op - # is the same - if self.lines[if_].l_no == self.lines[if_+3].l_no: - # Check if last op on line is PJIT or PJIF, and if it is - skip it - if self.code[self.prev_op[self.lines[if_].next]] in (POP_JUMP_IF_TRUE, POP_JUMP_IF_FALSE): - continue - filtered.append(if_) - return filtered + if self.code[start] == DUP_TOP: + except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) + if except_match: + jmp = self.prev_op[self.get_target(except_match)] + self.ignore_if.add(except_match) + self.not_continue.add(jmp) + return jmp + + count_END_FINALLY = 0 + count_SETUP_ = 0 + for i in self.op_range(start, len(self.code)): + op = self.code[i] + if op == END_FINALLY: + if count_END_FINALLY == count_SETUP_: + assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE) + self.not_continue.add(self.prev_op[i]) + return self.prev_op[i] + count_END_FINALLY += 1 + elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): + count_SETUP_ += 1 if __name__ == "__main__": co = inspect.currentframe().f_code diff --git a/uncompyle6/semantics/fragments.py b/uncompyle6/semantics/fragments.py index 486f0b97..4f73941c 100644 --- a/uncompyle6/semantics/fragments.py +++ b/uncompyle6/semantics/fragments.py @@ -445,18 +445,26 @@ class Traverser(pysource.Walker, object): def n_mkfunc(self, node): start = len(self.f.getvalue()) old_name = self.name - if PYTHON3: + if self.version >= 3.0: # LOAD_CONST code object .. - # LOAD_CONST 'x0' + # LOAD_CONST 'x0' if >= 3.3 # MAKE_FUNCTION .. - self.name = node[-2].attr - code_index = -3 + if self.version >= 3.4: + func_name = node[-2].attr + code_index = -3 + elif self.version == 3.3: + func_name = node[-2].pattr + code_index = -3 + else: + func_name = node[-2].attr.co_name + code_index = -2 + pass else: # LOAD_CONST code object .. # MAKE_FUNCTION .. - self.name = node[-2].attr.co_name + func_name = node[-2].attr.co_name code_index = -2 - self.write(self.name) + self.write(func_name) self.indentMore() self.make_function(node, isLambda=False, code_index=code_index) self.name = old_name diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index f2909860..85359934 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -67,7 +67,7 @@ from uncompyle6 import PYTHON3 from uncompyle6.parser import get_python_parser from uncompyle6.parsers.astnode import AST from uncompyle6.parsers.spark import GenericASTTraversal, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.scanner import Code, get_scanner +from uncompyle6.scanner import Code, GenericPythonCode, get_scanner from uncompyle6.scanners.tok import Token, NoneToken import uncompyle6.parser as python_parser @@ -920,12 +920,21 @@ class Walker(GenericASTTraversal, object): n_importstar = n_importfrom def n_mkfunc(self, node): + if self.version >= 3.0: # LOAD_CONST code object .. # LOAD_CONST 'x0' # MAKE_FUNCTION .. - func_name = node[-2].attr - code_index = -3 + if self.version >= 3.4: + func_name = node[-2].attr + code_index = -3 + elif self.version == 3.3: + func_name = node[-2].pattr + code_index = -3 + else: + func_name = node[-2].attr.co_name + code_index = -2 + pass else: # LOAD_CONST code object .. # MAKE_FUNCTION .. @@ -972,9 +981,12 @@ class Walker(GenericASTTraversal, object): self.prec = 27 code = node[code_index].attr + if isinstance(code, GenericPythonCode): + self.write(' for i_am in ["Python 2-3 deparsing limitation"]') + return + assert inspect.iscode(code) code = Code(code, self.scanner, self.currentclass) - # assert isinstance(code, Code) ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) @@ -1019,6 +1031,10 @@ class Walker(GenericASTTraversal, object): self.prec = 27 code = node[code_index].attr + if isinstance(code, GenericPythonCode): + self.write(' for i_am in ["Python 2-3 deparsing limitation"]') + return + assert inspect.iscode(code) code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) @@ -1438,6 +1454,10 @@ class Walker(GenericASTTraversal, object): defparams = node[:node[-1].attr] code = node[code_index].attr + if isinstance(code, GenericPythonCode): + self.write('(limitation="Cross Python 2/3 deparsing")') + return + assert inspect.iscode(code) code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code)