diff --git a/.gitignore b/.gitignore index 266af5fb..88363be6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.pyc *_dis *~ *.pyc @@ -9,6 +10,7 @@ /__pkginfo__.pyc /dist /how-to-make-a-release.txt +/nose-*.egg /tmp /uncompyle6.egg-info __pycache__ diff --git a/README.rst b/README.rst index 43e9ad47..c2c1f767 100644 --- a/README.rst +++ b/README.rst @@ -84,14 +84,18 @@ for usage help. Known Bugs/Restrictions ----------------------- -Python 2 deparsing decompiles about the first 140 or so of the Python -2.7.10 and 2.7.11 standard library files and all but less that 10% -verify. So as such, it is probably a little better than uncompyle2. -Other Python 2 versions do worse. +Python 2 deparsing decompiles each and all the Python 2.7.10 and +2.7.11 installed packages I have on my system, more than 90% verify +ok. Some of these failures may be bugs in the verification process. So +as such, it is probably a little better than uncompyle2. Other Python +2 versions do worse. -Python 3 deparsing before 3.5 is okay, but even there, more work is needed to -decompile all of its library. Python 3.5 is missing some of new -opcodes and idioms added, but it still often works. +More than 90% the Python 3.3, and 3.4 Python packages that I have +installed on my system deparse. Python 3.2 fares at a little less than +90%. (Each Python version has about 200 byteocde files). All of the +bytecode deparses also verify. Python is more problematic and is +missing some of new opcodes and idioms added. But it still often +works. There is lots to do, so please dig in and help. diff --git a/__pkginfo__.py b/__pkginfo__.py index 83404a06..e53e32f7 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -37,7 +37,7 @@ entry_points={ ]} ftp_url = None install_requires = ['spark-parser >= 1.2.1', - 'xdis >= 1.1.0'] + 'xdis >= 1.1.1'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' modname = 'uncompyle6' diff --git a/test/Makefile b/test/Makefile index 143f8b36..5f45e985 100644 --- a/test/Makefile +++ b/test/Makefile @@ -20,7 +20,7 @@ check: $(MAKE) check-$$PYTHON_VERSION #: Run working tests from Python 2.6 or 2.7 -check-2.6 check-2.7: check-bytecode check-2.7-ok +check-2.6 check-2.7: check-bytecode-2 check-bytecode-3 check-2.7-ok #: Run working tests from Python 3.2 check-3.2: check-bytecode @@ -41,14 +41,21 @@ check-3.4: check-bytecode check-3.4-ok check-2.7-ok check-disasm: $(PYTHON) dis-compare.py -#: Check deparsing bytecode only +#: Check deparsing bytecode 2.x only check-bytecode-2: + $(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 + +#: Check deparsing bytecode 3.x only +check-bytecode-3: + $(PYTHON) test_pythonlib.py --bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5 + +#: Check deparsing bytecode that works running Python 2 and Python 3 +check-bytecode: check-bytecode-3 $(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 -#: Check deparsing bytecode only -check-bytecode: - $(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 \ - --bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5 +#: Check deparsing Python 2.3 +check-bytecode-2.3: + $(PYTHON) test_pythonlib.py --bytecode-2.3 #: Check deparsing Python 2.5 check-bytecode-2.5: diff --git a/test/bytecode_2.3/00_assign.pyc b/test/bytecode_2.3/00_assign.pyc new file mode 100644 index 00000000..4edff968 Binary files /dev/null and b/test/bytecode_2.3/00_assign.pyc differ diff --git a/test/bytecode_2.3/00_import.pyc b/test/bytecode_2.3/00_import.pyc new file mode 100644 index 00000000..6e86104b Binary files /dev/null and b/test/bytecode_2.3/00_import.pyc differ diff --git a/test/bytecode_2.3/00_pass.pyc b/test/bytecode_2.3/00_pass.pyc new file mode 100644 index 00000000..11e5ff0f Binary files /dev/null and b/test/bytecode_2.3/00_pass.pyc differ diff --git a/test/bytecode_2.3/05_try_finally_pass.pyc b/test/bytecode_2.3/05_try_finally_pass.pyc new file mode 100644 index 00000000..fcc974f7 Binary files /dev/null and b/test/bytecode_2.3/05_try_finally_pass.pyc differ diff --git a/test/bytecode_2.6/06_list_ifnot_and.pyc b/test/bytecode_2.6/06_list_ifnot_and.pyc new file mode 100644 index 00000000..557b026a Binary files /dev/null and b/test/bytecode_2.6/06_list_ifnot_and.pyc differ diff --git a/test/simple_source/comprehension/06_list_ifnot_and.py b/test/simple_source/comprehension/06_list_ifnot_and.py new file mode 100644 index 00000000..18f3e2ef --- /dev/null +++ b/test/simple_source/comprehension/06_list_ifnot_and.py @@ -0,0 +1,18 @@ +# Bug from python2.6/SimpleXMLRPCServer.py +# The problem in 2.6 is handling + +# 72 JUMP_ABSOLUTE 17 (to 17) +# 75 POP_TOP +# 76 JUMP_ABSOLUTE 17 (to 17) + +# And getting: +# list_for ::= expr _for designator list_iter JUMP_BACK +# list_iter ::= list_if JUMP_BACK +# ^^^^^^^^^ added to 2.6 grammar +# list_iter ::= list_for + + +def list_public_methods(obj): + return [member for member in dir(obj) + if not member.startswith('_') and + hasattr(getattr(obj, member), '__call__')] diff --git a/test/test_pyenvlib.py b/test/test_pyenvlib.py index 53ae65e8..5cc3d150 100755 --- a/test/test_pyenvlib.py +++ b/test/test_pyenvlib.py @@ -27,7 +27,7 @@ from fnmatch import fnmatch #----- configure this for your needs -TEST_VERSIONS=('2.6.9', '2.7.10', '2.7.11', '3.2.6', '3.3.5', '3.4.2') +TEST_VERSIONS=('2.3.7', '2.6.9', '2.7.10', '2.7.11', '3.2.6', '3.3.5', '3.4.2') target_base = '/tmp/py-dis/' lib_prefix = os.path.join(os.environ['HOME'], '.pyenv/versions') diff --git a/test/test_pythonlib.py b/test/test_pythonlib.py index 6e759be1..a21232ed 100755 --- a/test/test_pythonlib.py +++ b/test/test_pythonlib.py @@ -78,7 +78,7 @@ for vers in (2.7, 3.4, 3.5): test_options[key] = (os.path.join(src_dir, pythonlib), PYOC, key, vers) pass -for vers in (2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5): +for vers in (2.3, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5): bytecode = "bytecode_%s" % vers key = "bytecode-%s" % vers test_options[key] = (bytecode, PYC, bytecode, vers) diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index c3439154..0dd97fe4 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -463,11 +463,27 @@ def get_python_parser(version, debug_parser, compile_mode='exec'): # FIXME: there has to be a better way... if version < 3.0: - import uncompyle6.parsers.parse2 as parse2 - if compile_mode == 'exec': - p = parse2.Python2Parser(debug_parser) + if version == 2.3: + import uncompyle6.parsers.parse23 as parse23 + if compile_mode == 'exec': + p = parse23.Python23Parser(debug_parser) + else: + p = parse23.Python23ParserSingle(debug_parser) + elif version == 2.6: + import uncompyle6.parsers.parse26 as parse26 + if compile_mode == 'exec': + p = parse26.Python26Parser(debug_parser) + else: + p = parse26.Python26ParserSingle(debug_parser) else: - p = parse2.Python2ParserSingle(debug_parser) + import uncompyle6.parsers.parse2 as parse2 + if compile_mode == 'exec': + p = parse2.Python2Parser(debug_parser) + else: + p = parse2.Python2ParserSingle(debug_parser) + pass + pass + pass else: import uncompyle6.parsers.parse3 as parse3 if version == 3.2: diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 13ebba37..075e66d2 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -1,9 +1,6 @@ -# Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 2005 by Dan Pascu # Copyright (c) 2015 Rocky Bernstein -# -# See LICENSE for license +# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 1999 John Aycock """ A spark grammar for Python 2.x. @@ -20,15 +17,11 @@ from __future__ import print_function from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.astnode import AST from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6 import PYTHON3 class Python2Parser(PythonParser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): - if PYTHON3: - super().__init__(AST, 'stmts', debug=debug_parser) - else: - super(Python2Parser, self).__init__(AST, 'stmts', debug=debug_parser) + super(Python2Parser, self).__init__(AST, 'stmts', debug=debug_parser) self.customized = {} def p_list_comprehension2(self, args): diff --git a/uncompyle6/parsers/parse23.py b/uncompyle6/parsers/parse23.py new file mode 100644 index 00000000..44f5cbe6 --- /dev/null +++ b/uncompyle6/parsers/parse23.py @@ -0,0 +1,536 @@ +# Copyright (c) 2016 Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 1999 John Aycock + +import string +from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.parsers.astnode import AST +from uncompyle6.parser import PythonParserSingle, ParserError, nop_func + +class Python23Parser(GenericASTBuilder): + def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): + GenericASTBuilder.__init__(self, AST, 'stmts', debug=debug_parser) + self.customized = {} + + def cleanup(self): + """ + Remove recursive references to allow garbage + collector to collect this object. + """ + for dict in (self.rule2func, self.rules, self.rule2name, self.first): + for i in dict.keys(): + dict[i] = None + for i in dir(self): + setattr(self, i, None) + + def error(self, token): + raise ParserError(token, token.offset) + + def typestring(self, token): + return token.type + + def p_funcdef(self, args): + ''' + stmt ::= funcdef + funcdef ::= mkfunc designator + load_closure ::= load_closure LOAD_CLOSURE + load_closure ::= LOAD_CLOSURE + ''' + + def p_list_comprehension(self, args): + ''' + expr ::= list_compr + list_compr ::= BUILD_LIST_0 DUP_TOP _load_attr + designator list_iter del_stmt + + list_iter ::= list_for + list_iter ::= list_if + list_iter ::= lc_body + + _load_attr ::= LOAD_ATTR + _load_attr ::= + + _lcfor ::= GET_ITER LIST_COMPREHENSION_START FOR_ITER + _lcfor ::= LOAD_CONST FOR_LOOP + _lcfor2 ::= GET_ITER FOR_ITER + _lcfor2 ::= LOAD_CONST FOR_LOOP + + list_for ::= expr _lcfor designator list_iter + LIST_COMPREHENSION_END JUMP_ABSOLUTE + + list_for ::= expr _lcfor2 designator list_iter + JUMP_ABSOLUTE + + list_if ::= expr condjmp IF_THEN_START list_iter + IF_THEN_END _jump POP_TOP IF_ELSE_START IF_ELSE_END + + lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP + lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP + lc_body ::= LOAD_NAME expr LIST_APPEND + lc_body ::= LOAD_FAST expr LIST_APPEND + ''' + + def p_augmented_assign(self, args): + ''' + stmt ::= augassign1 + stmt ::= augassign2 + augassign1 ::= expr expr inplace_op designator + augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR + augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0 + augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1 + augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2 + augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3 + augassign2 ::= expr DUP_TOP LOAD_ATTR expr + inplace_op ROT_TWO STORE_ATTR + + inplace_op ::= INPLACE_ADD + inplace_op ::= INPLACE_SUBTRACT + inplace_op ::= INPLACE_MULTIPLY + inplace_op ::= INPLACE_DIVIDE + inplace_op ::= INPLACE_TRUE_DIVIDE + inplace_op ::= INPLACE_FLOOR_DIVIDE + inplace_op ::= INPLACE_MODULO + inplace_op ::= INPLACE_POWER + inplace_op ::= INPLACE_LSHIFT + inplace_op ::= INPLACE_RSHIFT + inplace_op ::= INPLACE_AND + inplace_op ::= INPLACE_XOR + inplace_op ::= INPLACE_OR + ''' + + def p_assign(self, args): + ''' + stmt ::= assign + assign ::= expr DUP_TOP designList + assign ::= expr designator + ''' + + def p_print(self, args): + ''' + stmt ::= print_stmt + stmt ::= print_stmt_nl + stmt ::= print_nl_stmt + print_stmt ::= expr PRINT_ITEM + print_nl_stmt ::= PRINT_NEWLINE + print_stmt_nl ::= print_stmt print_nl_stmt + ''' + + def p_print_to(self, args): + ''' + stmt ::= print_to + stmt ::= print_to_nl + stmt ::= print_nl_to + print_to ::= expr print_to_items POP_TOP + print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO + print_nl_to ::= expr PRINT_NEWLINE_TO + print_to_items ::= print_to_items print_to_item + print_to_items ::= print_to_item + print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO + ''' + # expr print_to* POP_TOP + # expr { print_to* } PRINT_NEWLINE_TO + + def p_import15(self, args): + ''' + stmt ::= importstmt + stmt ::= importfrom + + importstmt ::= IMPORT_NAME STORE_FAST + importstmt ::= IMPORT_NAME STORE_NAME + + importfrom ::= IMPORT_NAME importlist POP_TOP + importlist ::= importlist IMPORT_FROM + importlist ::= IMPORT_FROM + ''' + + # Python 2.0 - 2.3 imports + def p_import20_23(self, args): + ''' + stmt ::= importstmt20 + stmt ::= importfrom20 + stmt ::= importstar20 + + importstmt20 ::= LOAD_CONST import_as + importstar20 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR + + importfrom20 ::= LOAD_CONST IMPORT_NAME importlist20 POP_TOP + importlist20 ::= importlist20 import_as + importlist20 ::= import_as + import_as ::= IMPORT_NAME designator + import_as ::= IMPORT_NAME LOAD_ATTR designator + import_as ::= IMPORT_FROM designator + ''' + + def p_grammar(self, args): + ''' + stmts ::= stmts stmt + stmts ::= stmt + + stmts_opt ::= stmts + stmts_opt ::= passstmt + passstmt ::= + + designList ::= designator designator + designList ::= designator DUP_TOP designList + + designator ::= STORE_FAST + designator ::= STORE_NAME + designator ::= STORE_GLOBAL + designator ::= STORE_DEREF + designator ::= expr STORE_ATTR + designator ::= expr STORE_SLICE+0 + designator ::= expr expr STORE_SLICE+1 + designator ::= expr expr STORE_SLICE+2 + designator ::= expr expr expr STORE_SLICE+3 + designator ::= store_subscr + store_subscr ::= expr expr STORE_SUBSCR + designator ::= unpack + designator ::= unpack_list + + stmt ::= classdef + stmt ::= call_stmt + call_stmt ::= expr POP_TOP + + stmt ::= return_stmt + return_stmt ::= expr RETURN_VALUE + + stmt ::= yield_stmt + yield_stmt ::= expr YIELD_STMT + yield_stmt ::= expr YIELD_VALUE + + stmt ::= break_stmt + break_stmt ::= BREAK_LOOP + + stmt ::= continue_stmt + continue_stmt ::= JUMP_ABSOLUTE + continue_stmt ::= CONTINUE_LOOP + + stmt ::= raise_stmt + raise_stmt ::= exprlist RAISE_VARARGS + raise_stmt ::= nullexprlist RAISE_VARARGS + + stmt ::= exec_stmt + exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT + exec_stmt ::= expr exprlist EXEC_STMT + + stmt ::= assert + stmt ::= assert2 + stmt ::= assert3 + stmt ::= assert4 + stmt ::= ifstmt + stmt ::= ifelsestmt + stmt ::= whilestmt + stmt ::= while1stmt + stmt ::= while12stmt + stmt ::= whileelsestmt + stmt ::= while1elsestmt + stmt ::= while12elsestmt + stmt ::= forstmt + stmt ::= forelsestmt + stmt ::= trystmt + stmt ::= tryfinallystmt + + stmt ::= del_stmt + del_stmt ::= DELETE_FAST + del_stmt ::= DELETE_NAME + del_stmt ::= DELETE_GLOBAL + del_stmt ::= expr DELETE_SLICE+0 + del_stmt ::= expr expr DELETE_SLICE+1 + del_stmt ::= expr expr DELETE_SLICE+2 + del_stmt ::= expr expr expr DELETE_SLICE+3 + del_stmt ::= delete_subscr + delete_subscr ::= expr expr DELETE_SUBSCR + del_stmt ::= expr DELETE_ATTR + + kwarg ::= LOAD_CONST expr + + classdef ::= LOAD_CONST expr mkfunc + CALL_FUNCTION_0 BUILD_CLASS designator + + condjmp ::= JUMP_IF_FALSE POP_TOP + condjmp ::= JUMP_IF_TRUE POP_TOP + + assert ::= expr JUMP_IF_FALSE POP_TOP + LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP + LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS + LOGIC_TEST_END LOGIC_TEST_END POP_TOP + assert2 ::= expr JUMP_IF_FALSE POP_TOP + LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP + LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS + LOGIC_TEST_END LOGIC_TEST_END POP_TOP + assert3 ::= expr JUMP_IF_TRUE POP_TOP + LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS + LOGIC_TEST_END POP_TOP + assert4 ::= expr JUMP_IF_TRUE POP_TOP + LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS + LOGIC_TEST_END POP_TOP + + _jump ::= JUMP_ABSOLUTE + _jump ::= JUMP_FORWARD + + ifstmt ::= expr condjmp + IF_THEN_START stmts_opt IF_THEN_END + _jump POP_TOP IF_ELSE_START IF_ELSE_END + + ifelsestmt ::= expr condjmp + IF_THEN_START stmts_opt IF_THEN_END + _jump POP_TOP IF_ELSE_START stmts IF_ELSE_END + + trystmt ::= SETUP_EXCEPT TRY_START stmts_opt + TRY_END POP_BLOCK _jump + except_stmt + + try_end ::= END_FINALLY TRY_ELSE_START TRY_ELSE_END + try_end ::= except_else + except_else ::= END_FINALLY TRY_ELSE_START stmts TRY_ELSE_END + + except_stmt ::= except_cond except_stmt + except_stmt ::= except_conds try_end + except_stmt ::= except try_end + except_stmt ::= try_end + + except_conds ::= except_cond except_conds + except_conds ::= + + except_cond ::= except_cond1 + except_cond ::= except_cond2 + except_cond1 ::= EXCEPT_START DUP_TOP expr COMPARE_OP + JUMP_IF_FALSE + POP_TOP POP_TOP POP_TOP POP_TOP + stmts_opt EXCEPT_END _jump POP_TOP + except_cond2 ::= EXCEPT_START DUP_TOP expr COMPARE_OP + JUMP_IF_FALSE + POP_TOP POP_TOP designator POP_TOP + stmts_opt EXCEPT_END _jump POP_TOP + except ::= EXCEPT_START POP_TOP POP_TOP POP_TOP + stmts_opt EXCEPT_END _jump + + tryfinallystmt ::= SETUP_FINALLY stmts_opt + POP_BLOCK LOAD_CONST + stmts_opt END_FINALLY + + _while1test ::= _jump JUMP_IF_FALSE POP_TOP + _while1test ::= + + whilestmt ::= SETUP_LOOP WHILE_START + expr condjmp + stmts_opt WHILE_END JUMP_ABSOLUTE + WHILE_ELSE_START POP_TOP POP_BLOCK WHILE_ELSE_END + + while1stmt ::= SETUP_LOOP _while1test WHILE1_START + stmts_opt WHILE1_END JUMP_ABSOLUTE + WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END + + while12stmt ::= SETUP_LOOP WHILE1_START + _jump JUMP_IF_FALSE POP_TOP + stmts_opt WHILE1_END JUMP_ABSOLUTE + WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END + + whileelsestmt ::= SETUP_LOOP WHILE_START + expr condjmp + stmts_opt WHILE_END JUMP_ABSOLUTE + WHILE_ELSE_START POP_TOP POP_BLOCK + stmts WHILE_ELSE_END + + while1elsestmt ::= SETUP_LOOP _while1test WHILE1_START + stmts_opt WHILE1_END JUMP_ABSOLUTE + WHILE1_ELSE_START POP_TOP POP_BLOCK + stmts WHILE1_ELSE_END + + while12elsestmt ::= SETUP_LOOP WHILE1_START + _jump JUMP_IF_FALSE POP_TOP + stmts_opt WHILE1_END JUMP_ABSOLUTE + WHILE1_ELSE_START POP_TOP POP_BLOCK + stmts WHILE1_ELSE_END + + _for ::= GET_ITER FOR_START FOR_ITER + _for ::= LOAD_CONST FOR_LOOP + + forstmt ::= SETUP_LOOP expr _for designator + stmts_opt FOR_END JUMP_ABSOLUTE + FOR_ELSE_START POP_BLOCK FOR_ELSE_END + forelsestmt ::= SETUP_LOOP expr _for designator + stmts_opt FOR_END JUMP_ABSOLUTE + FOR_ELSE_START POP_BLOCK stmts FOR_ELSE_END + + ''' + + def p_expr(self, args): + ''' + expr ::= load_closure mklambda + expr ::= mklambda + expr ::= SET_LINENO + expr ::= LOAD_FAST + expr ::= LOAD_NAME + expr ::= LOAD_CONST + expr ::= LOAD_GLOBAL + expr ::= LOAD_DEREF + expr ::= LOAD_LOCALS + expr ::= expr LOAD_ATTR + expr ::= binary_expr + expr ::= build_list + + binary_expr ::= expr expr binary_op + binary_op ::= BINARY_ADD + binary_op ::= BINARY_SUBTRACT + binary_op ::= BINARY_MULTIPLY + binary_op ::= BINARY_DIVIDE + binary_op ::= BINARY_TRUE_DIVIDE + binary_op ::= BINARY_FLOOR_DIVIDE + binary_op ::= BINARY_MODULO + binary_op ::= BINARY_LSHIFT + binary_op ::= BINARY_RSHIFT + binary_op ::= BINARY_AND + binary_op ::= BINARY_OR + binary_op ::= BINARY_XOR + binary_op ::= BINARY_POWER + + expr ::= binary_subscr + binary_subscr ::= expr expr BINARY_SUBSCR + expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR + expr ::= cmp + expr ::= expr UNARY_POSITIVE + expr ::= expr UNARY_NEGATIVE + expr ::= expr UNARY_CONVERT + expr ::= expr UNARY_INVERT + expr ::= expr UNARY_NOT + expr ::= mapexpr + expr ::= expr SLICE+0 + expr ::= expr expr SLICE+1 + expr ::= expr expr SLICE+2 + expr ::= expr expr expr SLICE+3 + expr ::= expr DUP_TOP SLICE+0 + expr ::= expr expr DUP_TOPX_2 SLICE+1 + expr ::= expr expr DUP_TOPX_2 SLICE+2 + expr ::= expr expr expr DUP_TOPX_3 SLICE+3 + expr ::= and + expr ::= and2 + expr ::= or + or ::= expr JUMP_IF_TRUE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END + and ::= expr JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END + and2 ::= _jump JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END + + cmp ::= cmp_list + cmp ::= compare + compare ::= expr expr COMPARE_OP + cmp_list ::= expr cmp_list1 ROT_TWO IF_ELSE_START POP_TOP + IF_ELSE_END + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE POP_TOP + cmp_list1 + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE POP_TOP + IF_THEN_START cmp_list1 + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE POP_TOP + IF_THEN_START cmp_list2 + cmp_list1 ::= expr DUP_TOP ROT_THREE + COMPARE_OP JUMP_IF_FALSE POP_TOP + cmp_list2 + cmp_list2 ::= expr COMPARE_OP IF_THEN_END JUMP_FORWARD + mapexpr ::= BUILD_MAP kvlist + + kvlist ::= kvlist kv + kvlist ::= kvlist kv2 + kvlist ::= + + kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR + kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR + + exprlist ::= exprlist expr + exprlist ::= expr + + nullexprlist ::= + ''' + + def nonterminal(self, nt, args): + collect = ('stmts', 'exprlist', 'kvlist') + + if nt in collect and len(args) > 1: + # + # Collect iterated thingies together. + # + rv = args[0] + rv.append(args[1]) + else: + rv = GenericASTBuilder.nonterminal(self, nt, args) + return rv + + def __ambiguity(self, children): + # only for debugging! to be removed hG/2000-10-15 + print children + return GenericASTBuilder.ambiguity(self, children) + + def resolve(self, list): + if len(list) == 2 and 'funcdef' in list and 'assign' in list: + return 'funcdef' + #print >> sys.stderr, 'resolve', str(list) + return GenericASTBuilder.resolve(self, list) + + def add_custom_rules(self, tokens, customize): + """ + Special handling for opcodes that take a variable number + of arguments -- we add a new rule for each: + + expr ::= {expr}^n BUILD_LIST_n + expr ::= {expr}^n BUILD_TUPLE_n + expr ::= {expr}^n BUILD_SLICE_n + unpack_list ::= UNPACK_LIST {expr}^n + unpack ::= UNPACK_TUPLE {expr}^n + unpack ::= UNPACK_SEQEUENE {expr}^n + mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n + mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n + expr ::= expr {expr}^n CALL_FUNCTION_n + expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP + expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP + expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP + """ + for k, v in customize.items(): + # avoid adding the same rule twice to this parser + if self.customized.has_key(k): + continue + self.customized[k] = None + + #nop_func = lambda self, args: None + op = k[:string.rfind(k, '_')] + if op in ('BUILD_LIST', 'BUILD_TUPLE'): + rule = 'build_list ::= ' + 'expr '*v + k + elif op == 'BUILD_SLICE': + rule = 'expr ::= ' + 'expr '*v + k + elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): + rule = 'unpack ::= ' + k + ' designator'*v + elif op == 'UNPACK_LIST': + rule = 'unpack_list ::= ' + k + ' designator'*v + elif op == 'DUP_TOPX': + # no need to add a rule + continue + #rule = 'dup_topx ::= ' + 'expr '*v + k + elif op == 'MAKE_FUNCTION': + self.addRule('mklambda ::= %s LOAD_LAMBDA %s' % + ('expr '*v, k), nop_func) + rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k) + elif op == 'MAKE_CLOSURE': + self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' % + ('expr '*v, k), nop_func) + rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k) + elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', + 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): + na = (v & 0xff) # positional parameters + nk = (v >> 8) & 0xff # keyword parameters + # number of apply equiv arguments: + nak = ( len(op)-len('CALL_FUNCTION') ) / 3 + rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \ + + 'expr ' * nak + k + else: + raise 'unknown customize token %s' % k + self.addRule(rule, nop_func) + return + pass + + +class Python23ParserSingle(Python23Parser, PythonParserSingle): + pass + +# local variables: +# tab-width: 4 diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py new file mode 100644 index 00000000..7b77c105 --- /dev/null +++ b/uncompyle6/parsers/parse26.py @@ -0,0 +1,23 @@ +# Copyright (c) 2016 Rocky Bernstein +""" +spark grammar differences over Python2 for Python 2.6. +""" + +from uncompyle6.parser import PythonParserSingle +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.parsers.parse2 import Python2Parser + +class Python26Parser(Python2Parser): + + def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): + super(Python26Parser, self).__init__(debug_parser) + self.customized = {} + + + def p_lis_iter(self, args): + ''' + list_iter ::= list_if JUMP_BACK + ''' + +class Python26ParserSingle(Python2Parser, PythonParserSingle): + pass diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 36a95f67..f449ee08 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -21,6 +21,13 @@ import sys from uncompyle6 import PYTHON3 from uncompyle6.scanners.tok import Token +# The byte code versions we support +if PYTHON3: + # Need to work out Python 2.3. ord's in PYTHON3 + PYTHON_VERSIONS = (2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5) +else: + PYTHON_VERSIONS = (2.3, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5) + # FIXME: DRY if PYTHON3: intern = sys.intern @@ -45,30 +52,14 @@ class Code(object): class Scanner(object): - def __init__(self, version): + def __init__(self, version, show_asm=None): self.version = version - # FIXME: DRY - if version == 2.7: - from xdis.opcodes import opcode_27 - self.opc = opcode_27 - elif version == 2.6: - from xdis.opcodes import opcode_26 - self.opc = opcode_26 - elif version == 2.5: - from xdis.opcodes import opcode_25 - self.opc = opcode_25 - elif version == 3.2: - from xdis.opcodes import opcode_32 - self.opc = opcode_32 - elif version == 3.3: - from xdis.opcodes import opcode_33 - self.opc = opcode_33 - elif version == 3.4: - from xdis.opcodes import opcode_34 - self.opc = opcode_34 - elif version == 3.5: - from xdis.opcodes import opcode_35 - self.opc = opcode_35 + self.show_asm = show_asm + + if version in PYTHON_VERSIONS: + v_str = "opcode_%s" % (int(version * 10)) + exec("from xdis.opcodes import %s" % v_str) + exec("self.opc = %s" % v_str) else: raise TypeError("%s is not a Python version I know about" % version) @@ -281,33 +272,18 @@ class Scanner(object): target = parent['end'] return target -def get_scanner(version): +def get_scanner(version, show_asm=None): # Pick up appropriate scanner - # from trepan.api import debug; - # debug(start_opts={'startup-profile': True}) - - # FIXME: see if we can do better - if version == 2.7: - import uncompyle6.scanners.scanner27 as scan - scanner = scan.Scanner27() - elif version == 2.6: - import uncompyle6.scanners.scanner26 as scan - scanner = scan.Scanner26() - elif version == 2.5: - import uncompyle6.scanners.scanner25 as scan - scanner = scan.Scanner25() - elif version == 3.2: - import uncompyle6.scanners.scanner32 as scan - scanner = scan.Scanner32() - elif version == 3.3: - import uncompyle6.scanners.scanner33 as scan - scanner = scan.Scanner33() - elif version == 3.4: - import uncompyle6.scanners.scanner34 as scan - scanner = scan.Scanner34() - elif version == 3.5: - import uncompyle6.scanners.scanner35 as scan - scanner = scan.Scanner35() + if version in PYTHON_VERSIONS: + v_str = "%s" % (int(version * 10)) + exec("import uncompyle6.scanners.scanner%s as scan" % v_str) + if PYTHON3: + import importlib + scan = importlib.import_module("uncompyle6.scanners.scanner%s" % v_str) + if False: print(scan) # Avoid unused scan + else: + exec("import uncompyle6.scanners.scanner%s as scan" % v_str) + scanner = eval("scan.Scanner%s(show_asm=show_asm)" % v_str) else: raise RuntimeError("Unsupported Python version %s" % version) return scanner @@ -315,9 +291,5 @@ def get_scanner(version): if __name__ == "__main__": import inspect, uncompyle6 co = inspect.currentframe().f_code - scanner = get_scanner(uncompyle6.PYTHON_VERSION) + scanner = get_scanner(uncompyle6.PYTHON_VERSION, True) tokens, customize = scanner.disassemble(co, {}) - print('-' * 30) - for t in tokens: - print(t) - pass diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py index f0044c69..f3e9b704 100755 --- a/uncompyle6/scanners/scanner2.py +++ b/uncompyle6/scanners/scanner2.py @@ -32,12 +32,12 @@ from xdis.bytecode import findlinestarts import uncompyle6.scanner as scan class Scanner2(scan.Scanner): - def __init__(self, version): - scan.Scanner.__init__(self, version) + def __init__(self, version, show_asm=None): + scan.Scanner.__init__(self, version, show_asm) self.pop_jump_if = frozenset([self.opc.PJIF, self.opc.PJIT]) self.jump_forward = frozenset([self.opc.JA, self.opc.JF]) - def disassemble(self, co, classname=None, code_objects={}): + def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ Disassemble a Python 2 code object, returning a list of 'Token'. Various tranformations are made to assist the deparsing grammar. @@ -49,9 +49,12 @@ class Scanner2(scan.Scanner): dis.disassemble(). """ - ## FIXME: DRY with disassemble_native - - # import dis; dis.disassemble(co) # DEBUG + show_asm = self.show_asm if not show_asm else show_asm + if self.show_asm in ('both', 'before'): + from xdis.bytecode import Bytecode + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr._disassemble()) # Container for tokens tokens = [] @@ -60,6 +63,7 @@ class Scanner2(scan.Scanner): Token = self.Token # shortcut n = self.setup_code(co) + self.build_lines_data(co, n) self.build_prev_op(n) @@ -201,75 +205,13 @@ class Scanner2(scan.Scanner): tokens.append(Token(op_name, oparg, pattr, offset, linestart)) else: tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) - return tokens, customize - - def disassemble_native(self, co, classname=None, code_objects={}): - """ - Like disassemble3 but doesn't try to adjust any opcodes. - """ - - ## FIXME: DRY with disassemble - - # Container for tokens - tokens = [] - - customize = {} - Token = self.Token # shortcut - - n = self.setup_code(co) - self.build_lines_data(co, n) - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - - free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] - names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] - else: - free = co.co_cellvars + co.co_freevars - names = co.co_names - varnames = co.co_varnames - - extended_arg = 0 - for offset in self.op_range(0, n): - op = self.code[offset] - op_name = self.opc.opname[op] - - oparg = None; pattr = None - if op >= self.opc.HAVE_ARGUMENT: - oparg = self.get_argument(offset) + extended_arg - extended_arg = 0 - if op == self.opc.EXTENDED_ARG: - extended_arg = oparg * scan.L65536 - continue - if op in self.opc.hasconst: - pattr = co.co_consts[oparg] - elif op in self.opc.hasname: - pattr = names[oparg] - elif op in self.opc.hasjrel: - pattr = repr(offset + 3 + oparg) - elif op in self.opc.hasjabs: - pattr = repr(oparg) - elif op in self.opc.haslocal: - pattr = varnames[oparg] - elif op in self.opc.hascompare: - pattr = self.opc.cmp_op[oparg] - elif op in self.opc.hasfree: - pattr = free[oparg] - - if offset in self.linestartoffsets: - linestart = self.linestartoffsets[offset] - else: - linestart = None - - tokens.append(Token(op_name, oparg, pattr, offset, linestart)) + pass pass + + if self.show_asm in ('both', 'after'): + for t in tokens: + print(t) + print() return tokens, customize def op_size(self, op): diff --git a/uncompyle6/scanners/scanner23.py b/uncompyle6/scanners/scanner23.py new file mode 100644 index 00000000..71b96620 --- /dev/null +++ b/uncompyle6/scanners/scanner23.py @@ -0,0 +1,709 @@ +# Copyright (c) 2016 by Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel +""" +Python 2.3 bytecode scanner + +This overlaps Python's 2.3's dis module, but it can be run from Python 3 and +other versions of Python. Also, we save token information for later +use in deparsing. +""" + +from uncompyle6.scanners.scanner2 import Scanner2 +from uncompyle6.scanner import L65536 + +class Scanner23(Scanner2): + def __init__(self, show_asm=None): + super(Scanner23, self).__init__(2.3, show_asm) + + def disassemble(self, co, code_objects={}, show_asm=None): + """ + Disassemble a code object, returning a list of 'Token'. + + The main part of this procedure is modelled after + dis.disassemble(). + """ + + if self.show_asm in ('both', 'before'): + from xdis.bytecode import Bytecode + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr._disassemble()) + + # Container for tokens + tokens = [] + + customize = {} + Token = self.Token # shortcut + + self.code = co.co_code + structures = self.find_structures(self.code) + #cf = self.find_jump_targets(code) + n = len(self.code) + i = 0 + extended_arg = 0 + free = None + while i < n: + offset = i + if structures.has_key(offset): + j = 0 + for elem in structures[offset]: + tokens.append(Token(elem, offset="%s_%d" % (offset, j))) + j += 1 + + c = self.code[i] + op = ord(c) + opname = self.opc.opname[op] + i += 1 + oparg = None; pattr = None + if op >= self.opc.HAVE_ARGUMENT: + oparg = ord(self.code[i]) + ord(self.code[i+1]) * 256 + extended_arg + extended_arg = 0 + i += 2 + if op == self.opc.EXTENDED_ARG: + extended_arg = oparg * L65536 + if op in self.opc.hasconst: + const = co.co_consts[oparg] + # We can't use inspect.iscode() because we may be + # using a different version of Python than the + # one that this was byte-compiled on. So the code + # types may mismatch. + if hasattr(const, 'co_name'): + oparg = const + const = oparg + if const.co_name == '': + assert opname == 'LOAD_CONST' + opname = 'LOAD_LAMBDA' + # verify uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + elif op in self.opc.hasname: + pattr = co.co_names[oparg] + elif op in self.opc.hasjrel: + pattr = repr(i + oparg) + elif op in self.opc.hasjabs: + pattr = repr(oparg) + elif op in self.opc.haslocal: + pattr = co.co_varnames[oparg] + elif op in self.opc.hascompare: + pattr = self.opc.cmp_op[oparg] + elif op in self.opc.hasfree: + if free is None: + free = co.co_cellvars + co.co_freevars + pattr = free[oparg] + + if opname == 'SET_LINENO': + continue + elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE', + 'UNPACK_LIST', 'UNPACK_TUPLE', 'UNPACK_SEQUENCE', + 'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE', + 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', + 'CALL_FUNCTION_VAR_KW', 'DUP_TOPX', + ): + opname = '%s_%d' % (opname, oparg) + customize[opname] = oparg + + tokens.append(Token(opname, oparg, pattr, offset)) + pass + + if self.show_asm: + for t in tokens: + print(t) + print() + + return tokens, customize + + def __get_target(self, code, pos, op=None): + if op is None: + op = ord(code[pos]) + target = ord(code[pos+1]) + ord(code[pos+2]) * 256 + if op in self.self.opc.hasjrel: + target += pos + 3 + return target + + def __first_instr(self, code, start, end, instr, target=None, exact=True): + """ + Find the first in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely if exact + is True, or if exact is False, the instruction which has a target + closest to will be returned. + + Return index to it or None if not found. + """ + + assert(start>=0 and end in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely if exact + is True, or if exact is False, the instruction which has a target + closest to will be returned. + + Return index to it or None if not found. + """ + + assert(start>=0 and end in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely. + + Return a list with indexes to them or [] if none found. + """ + + assert(start>=0 and end= HAVE_ARGUMENT: + break + ops[pos] = op + opp[pos] = x + pos += 1 + x += 1 + if ops[0] == POP_TOP and ops[1] == END_FINALLY and opp[1] == end: + return jmp + if ops[0] == POP_TOP and ops[1] == DUP_TOP: + return jmp + if ops[0] == ops[1] == ops[2] == ops[3] == POP_TOP: + return jmp + start = jmp + 3 + return None + + def __list_comprehension(self, code, pos, op=None): + """ + Determine if there is a list comprehension structure starting at pos + """ + BUILD_LIST = self.opc.opmap['BUILD_LIST'] + DUP_TOP = self.opc.opmap['DUP_TOP'] + LOAD_ATTR = self.opc.opmap['LOAD_ATTR'] + if op is None: + op = ord(code[pos]) + if op != BUILD_LIST: + return 0 + try: + elems = ord(code[pos+1]) + ord(code[pos+2])*256 + codes = (op, elems, ord(code[pos+3]), ord(code[pos+4])) + except IndexError: + return 0 + return (codes==(BUILD_LIST, 0, DUP_TOP, LOAD_ATTR)) + + def __ignore_if(self, code, pos): + """ + Return true if this 'if' is to be ignored. + """ + POP_TOP = self.opc.opmap['POP_TOP'] + COMPARE_OP = self.opc.opmap['COMPARE_OP'] + EXCEPT_MATCH = self.opc.copmap['exception match'] + + ## If that was added by a while loop + if pos in self.__ignored_ifs: + return 1 + + # Check if we can test only for POP_TOP for this -Dan + # Maybe need to be done as above (skip SET_LINENO's) + if (ord(code[pos-3])==COMPARE_OP and + (ord(code[pos-2]) + ord(code[pos-1])*256)==EXCEPT_MATCH and + ord(code[pos+3])==POP_TOP and + ord(code[pos+4])==POP_TOP and + ord(code[pos+5])==POP_TOP and + ord(code[pos+6])==POP_TOP): + return 1 ## Exception match + return 0 + + def __fix_parent(self, code, target, parent): + """Fix parent boundaries if needed""" + JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE'] + start = parent['start'] + end = parent['end'] + + ## Map the second start point for 'while 1:' in python 2.3+ to start + try: target = self.__while1[target] + except: pass + if target >= start or end-start < 3 or target not in self.__loops: + return + if ord(code[end-3])==JUMP_ABSOLUTE: + cont_target = self.__get_target(code, end-3, JUMP_ABSOLUTE) + if target == cont_target: + parent['end'] = end-3 + + def __restrict_to_parent(self, target, parent): + """Restrict pos to parent boundaries.""" + if not (parent['start'] < target < parent['end']): + target = parent['end'] + return target + + def __detect_structure(self, code, pos, op=None): + """ + Detect structures and their boundaries to fix optimizied jumps + in python2.3+ + """ + + # TODO: check the struct boundaries more precisely -Dan + + SETUP_LOOP = self.opc.opmap['SETUP_LOOP'] + FOR_ITER = self.opc.opmap['FOR_ITER'] + GET_ITER = self.opc.opmap['GET_ITER'] + SETUP_EXCEPT = self.opc.opmap['SETUP_EXCEPT'] + JUMP_FORWARD = self.opc.opmap['JUMP_FORWARD'] + JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE'] + JUMP_IF_FALSE = self.opc.opmap['JUMP_IF_FALSE'] + JUMP_IF_TRUE = self.opc.opmap['JUMP_IF_TRUE'] + END_FINALLY = self.opc.opmap['END_FINALLY'] + POP_TOP = self.opc.opmap['POP_TOP'] + POP_BLOCK = self.opc.opmap['POP_BLOCK'] + try: SET_LINENO = self.opc.opmap['SET_LINENO'] + except: SET_LINENO = None + + # Ev remove this test and make op a mandatory argument -Dan + if op is None: + op = ord(code[pos]) + + ## Detect parent structure + parent = self.__structs[0] + start = parent['start'] + end = parent['end'] + for s in self.__structs: + if s['type'] == 'LOGIC_TEST': + continue ## logic tests are not structure containers + _start = s['start'] + _end = s['end'] + if (_start <= pos < _end) and (_start >= start and _end < end): + start = _start + end = _end + parent = s + + ## We need to know how many new structures were added in this run + origStructCount = len(self.__structs) + + if op == SETUP_LOOP: + start = pos+3 + # this is for python2.2. Maybe we can optimize and not call this for 2.3+ -Dan + while ord(code[start]) == SET_LINENO: + start += 3 + start_op = ord(code[start]) + while1 = False + if start_op in (JUMP_FORWARD, JUMP_ABSOLUTE): + ## This is a while 1 (has a particular structure) + start = self.__get_target(code, start, start_op) + start = self.__restrict_to_parent(start, parent) + self.__while1[pos+3] = start ## map between the 2 start points + while1 = True + if start_op == JUMP_ABSOLUTE and ord(code[pos+6])==JUMP_IF_FALSE: + # special `while 1: pass` in python2.3 + self.__fixed_jumps[pos+3] = start + target = self.__get_target(code, pos, op) + end = self.__restrict_to_parent(target, parent) + if target != end: + self.__fixed_jumps[pos] = end + jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, + start, False) + assert(jump_back is not None) + target = self.__get_target(code, jump_back, JUMP_ABSOLUTE) + i = target + while i < jump_back and ord(code[i])==SET_LINENO: + i += 3 + if ord(code[i]) in (FOR_ITER, GET_ITER): + loop_type = 'FOR' + else: + lookup = [JUMP_IF_FALSE, JUMP_IF_TRUE] + test = self.__first_instr(code, pos+3, jump_back, lookup, jump_back+3) + if test is None: + # this is a special while 1 structure in python 2.4 + while1 = True + else: + #assert(test is not None) + test_target = self.__get_target(code, test) + test_target = self.__restrict_to_parent(test_target, parent) + next = (ord(code[test_target]), ord(code[test_target+1])) + if next == (POP_TOP, POP_BLOCK): + self.__ignored_ifs.append(test) + else: + while1 = True + if while1 == True: + loop_type = 'WHILE1' + else: + loop_type = 'WHILE' + + self.__loops.append(target) + self.__structs.append({'type': loop_type, + 'start': target, + 'end': jump_back}) + self.__structs.append({'type': loop_type + '_ELSE', + 'start': jump_back+3, + 'end': end}) + elif self.__list_comprehension(code, pos, op): + get_iter = self.__first_instr(code, pos+7, end, GET_ITER) + for_iter = self.__first_instr(code, get_iter, end, FOR_ITER) + assert(get_iter is not None and for_iter is not None) + start = get_iter+1 + target = self.__get_target(code, for_iter, FOR_ITER) + end = self.__restrict_to_parent(target, parent) + jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, + start, False) + assert(jump_back is not None) + target = self.__get_target(code, jump_back, JUMP_ABSOLUTE) + start = self.__restrict_to_parent(target, parent) + self.__structs.append({'type': 'LIST_COMPREHENSION', + 'start': start, + 'end': jump_back}) + elif op == SETUP_EXCEPT: + start = pos+3 + target = self.__get_target(code, pos, op) + # this should be redundant as it can't be out of boundaries -Dan + # check if it can be removed + end = self.__restrict_to_parent(target, parent) + if target != end: + #print "!!!!found except target != end: %s %s" % (target, end) + self.__fixed_jumps[pos] = end + ## Add the try block + self.__structs.append({'type': 'TRY', + 'start': start, + 'end': end-4}) + ## Now isolate the except and else blocks + start = end + target = self.__get_target(code, start-3) + #self.__fix_parent(code, target, parent) + try_else_start = target + end = self.__restrict_to_parent(target, parent) + if target != end: + self.__fixed_jumps[start-3] = end + + end_finally = self.__last_instr(code, start, end, END_FINALLY) + assert(end_finally is not None) + lookup = [JUMP_ABSOLUTE, JUMP_FORWARD] + jump_end = self.__last_instr(code, start, end_finally, lookup) + assert(jump_end is not None) + + target = self.__get_target(code, jump_end) + if target == try_else_start: + end = end_finally+1 + else: + end = self.__restrict_to_parent(target, parent) + if target != end: + self.__fixed_jumps[jump_end] = end + + ## Add the try-else block + self.__structs.append({'type': 'TRY_ELSE', + 'start': end_finally+1, + 'end': end}) + ## Add the except blocks + i = start + while i < end_finally: + jmp = self.__next_except_jump(code, i, end_finally, target) + if jmp is None: + break + if i!=start and ord(code[i])==POP_TOP: + pos = i + 1 + else: + pos = i + self.__structs.append({'type': 'EXCEPT', + 'start': pos, + 'end': jmp}) + if target != end: + self.__fixed_jumps[jmp] = end + i = jmp+3 + elif op == JUMP_ABSOLUTE: + ## detect if we have a 'foo and bar and baz...' structure + ## that was optimized (thus the presence of JUMP_ABSOLUTE) + return # no longer needed. just return. remove this elif later -Dan + if pos in self.__fixed_jumps: + return ## Already marked + if parent['end'] - pos < 7: + return + next = (ord(code[pos+3]), ord(code[pos+6])) + if next != (JUMP_IF_FALSE, POP_TOP): + return + + end = self.__get_target(code, pos+3) + ifs = self.__all_instr(code, pos, end, JUMP_IF_FALSE, end) + + ## Test if all JUMP_IF_FALSE we have found belong to the + ## structure (may not be needed but it doesn't hurt) + count = len(ifs) + if count < 2: + return + for jif in ifs[1:]: + before = ord(code[jif-3]) + after = ord(code[jif+3]) + if (before not in (JUMP_FORWARD, JUMP_ABSOLUTE) or + after != POP_TOP): + return + + ## All tests passed. Perform fixes + self.__ignored_ifs.extend(ifs) + for i in range(count-1): + self.__fixed_jumps[ifs[i]-3] = ifs[i+1]-3 + elif op in (JUMP_IF_FALSE, JUMP_IF_TRUE): + if self.__ignore_if(code, pos): + return + start = pos+4 ## JUMP_IF_FALSE/TRUE + POP_TOP + target = self.__get_target(code, pos, op) + if parent['start'] <= target <= parent['end']: + if ord(code[target-3]) in (JUMP_ABSOLUTE, JUMP_FORWARD): + if_end = self.__get_target(code, target-3) + #self.__fix_parent(code, if_end, parent) + end = self.__restrict_to_parent(if_end, parent) + if ord(code[end-3]) == JUMP_ABSOLUTE: + else_end = self.__get_target(code, end-3) + if if_end == else_end and if_end in self.__loops: + end -= 3 ## skip the continue instruction + if if_end != end: + self.__fixed_jumps[target-3] = end + self.__structs.append({'type': 'IF_THEN', + 'start': start, + 'end': target-3}) + self.__structs.append({'type': 'IF_ELSE', + 'start': target+1, + 'end': end}) + else: + self.__structs.append({'type': 'LOGIC_TEST', + 'start': start, + 'end': target}) + + def find_jump_targets(self, code): + """ + Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + This procedure is modelled after self.opc.findlables(), but here + for each target the number of jumps are counted. + """ + HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT + + hasjrel = self.opc.hasjrel + hasjabs = self.opc.hasjabs + + needFixing = (self.__pyversion >= 2.3) + + n = len(code) + self.__structs = [{'type': 'root', + 'start': 0, + 'end': n-1}] + self.__loops = [] ## All loop entry points + self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point + self.__fixed_jumps = {} ## Map fixed jumps to their real destination + self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore + + targets = {} + i = 0 + while i < n: + op = ord(code[i]) + + if needFixing: + ## Determine structures and fix jumps for 2.3+ + self.__detect_structure(code, i, op) + + if op >= HAVE_ARGUMENT: + label = self.__fixed_jumps.get(i) + if label is None: + oparg = ord(code[i+1]) + ord(code[i+2]) * 256 + if op in hasjrel: + label = i + 3 + oparg + elif op in hasjabs: + # todo: absolute jumps + pass + if label is not None: + targets[label] = targets.get(label, 0) + 1 + i += 3 + else: + i += 1 + return targets + + def find_structures(self, code): + """ + Detect all structures in a byte code. + + Return a mapping from offset to a list of keywords that should + be inserted at that position. + """ + HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT + + n = len(code) + self.__structs = [{'type': 'root', + 'start': 0, + 'end': n-1}] + self.__loops = [] ## All loop entry points + self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point + self.__fixed_jumps = {} ## Map fixed jumps to their real destination + self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore + + i = 0 + while i < n: + op = ord(code[i]) + if op >= HAVE_ARGUMENT: + i += 3 + else: + i += 1 + #from pprint import pprint + #print + #print "structures: ", + #pprint(self.__structs) + #print "loops: ", + #pprint(self.__loops) + #print "while1: ", + #pprint(self.__while1) + #print "fixed jumps: ", + #pprint(self.__fixed_jumps) + #print "ignored ifs: ", + #pprint(self.__ignored_ifs) + #print + points = {} + endpoints = {} + for s in self.__structs: + typ = s['type'] + start = s['start'] + end = s['end'] + if typ == 'root': + continue + ## startpoints of the outer structures must come first + ## endpoints of the inner structures must come first + points.setdefault(start, []).append("%s_START" % typ) + endpoints.setdefault(end, []).insert(0, "%s_END" % typ) + for k, v in endpoints.items(): + points.setdefault(k, []).extend(v) + #print "points: ", + #pprint(points) + #print + return points + +# __scanners = {} + +# def getscanner(version): +# if not __scanners.has_key(version): +# __scanners[version] = Scanner(version) +# return __scanners[version] + +if __name__ == "__main__": + from uncompyle6 import PYTHON_VERSION + if PYTHON_VERSION == 2.3: + import inspect + co = inspect.currentframe().f_code + tokens, customize = Scanner23().disassemble(co) + for t in tokens: + print(t.format()) + else: + print("Need to be Python 2.3 to demo; I am %s." % + PYTHON_VERSION) + + +# local variables: +# tab-width: 4 diff --git a/uncompyle6/scanners/scanner25.py b/uncompyle6/scanners/scanner25.py index e6ece85c..f135ab9f 100755 --- a/uncompyle6/scanners/scanner25.py +++ b/uncompyle6/scanners/scanner25.py @@ -23,8 +23,8 @@ JUMP_OPs = opcode_25.JUMP_OPs # The history is that 2.7 support is the cleanest, # then from that we got 2.6 and so on. class Scanner25(scan.Scanner26): - def __init__(self): - scan2.Scanner2.__init__(self, 2.5) + def __init__(self, show_asm): + scan2.Scanner2.__init__(self, 2.5, show_asm) self.stmt_opcodes = frozenset([ self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, self.opc.SETUP_FINALLY, self.opc.END_FINALLY, diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index e3953bf7..6be08b5e 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -18,8 +18,8 @@ from xdis.opcodes import opcode_26 JUMP_OPs = opcode_26.JUMP_OPs class Scanner26(scan.Scanner2): - def __init__(self): - super(Scanner26, self).__init__(2.6) + def __init__(self, show_asm=False): + super(Scanner26, self).__init__(2.6, show_asm) self.stmt_opcodes = frozenset([ self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, self.opc.SETUP_FINALLY, self.opc.END_FINALLY, @@ -65,7 +65,7 @@ class Scanner26(scan.Scanner2): return - def disassemble(self, co, classname=None, code_objects={}): + def disassemble(self, co, classname=None, code_objects={}, show_asm=None): ''' Disassemble a code object, returning a list of 'Token'. @@ -73,7 +73,17 @@ class Scanner26(scan.Scanner2): dis.disassemble(). ''' - # import dis; dis.disassemble(co) # DEBUG + show_asm = self.show_asm if not show_asm else show_asm + if self.show_asm in ('both', 'before'): + from xdis.bytecode import Bytecode + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr._disassemble()) + + # from xdis.bytecode import Bytecode + # bytecode = Bytecode(co, self.opc) + # for instr in bytecode.get_instructions(co): + # print(instr._disassemble()) # Container for tokens tokens = [] @@ -82,7 +92,8 @@ class Scanner26(scan.Scanner2): Token = self.Token # shortcut n = self.setup_code(co) - self.build_lines_data(co, n) + + self.build_lines_data(co, n-1) # linestarts contains block code adresses (addr,block) self.linestarts = list(findlinestarts(co)) @@ -247,9 +258,10 @@ class Scanner26(scan.Scanner2): pass pass - # Debug - # for t in tokens: - # print t + if self.show_asm: + for t in tokens: + print(t) + print() return tokens, customize def getOpcodeToDel(self, i): @@ -509,11 +521,8 @@ class Scanner26(scan.Scanner2): if op in self.pop_jump_if: target = self.get_argument(i) target += i + 3 - self.restructJump(i, target) - if self.op_hasArgument(op) and op not in self.opc.hasArgumentExtended: - i += 3 - else: i += 1 + i += self.op_size(op) i=0 while i < len(self.code): # we can't use op_range for the moment @@ -523,9 +532,17 @@ class Scanner26(scan.Scanner2): if self.code[target] == self.opc.JA: target = self.get_target(target) self.restructJump(i, target) - if self.op_hasArgument(op) and op not in self.opc.hasArgumentExtended: - i += 3 - else: i += 1 + i += self.op_size(op) + i=0 + # while i < len(self.code): # we can't use op_range for the moment + # op = self.code[i] + # name = self.opc.opname[op] + # if self.op_hasArgument(op): + # oparg = self.get_argument(i) + # print("%d %s %d" % (i, name, oparg)) + # else: + # print("%d %s" % (i, name)) + # i += self.op_size(op) def restructJump(self, pos, newTarget): if self.code[pos] not in self.opc.hasjabs + self.opc.hasjrel: @@ -577,8 +594,8 @@ class Scanner26(scan.Scanner2): if (jump_back and jump_back != self.prev[end] and code[jump_back + 3] in self.jump_forward): if (code[self.prev[end]] == self.opc.RETURN_VALUE - or code[self.prev[end]] == self.opc.POP_BLOCK - and code[self.prev[self.prev[end]]] == self.opc.RETURN_VALUE): + or (code[self.prev[end]] == self.opc.POP_BLOCK + and code[self.prev[self.prev[end]]] == self.opc.RETURN_VALUE)): jump_back = None if not jump_back: # loop suite ends in return. wtf right? jump_back = self.last_instr(start, end, self.opc.JA, start, False) @@ -595,7 +612,7 @@ class Scanner26(scan.Scanner2): else: if self.get_target(jump_back) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JA, start, False) - if end > jump_back + 4 and code[end] in (self.opc.JF, self.opc.JA): + if end > jump_back + 4 and code[end] in self.jump_forward: if code[jump_back + 4] in (self.opc.JA, self.opc.JF): if self.get_target(jump_back+4) == self.get_target(end): self.fixed_jumps[pos] = jump_back+4 @@ -694,7 +711,9 @@ class Scanner26(scan.Scanner2): # is this an if and if op == self.opc.PJIF: match = self.rem_or(start, self.next_stmt[pos], self.opc.PJIF, target) - match = self.remove_mid_line_ifs(match) + ## We can't remove mid-line ifs because line structures have changed + ## from restructBytecode(). + ## match = self.remove_mid_line_ifs(match) if match: if (code[pre[rtarget]] in (self.opc.JF, self.opc.JA) and pre[rtarget] not in self.stmts @@ -796,9 +815,7 @@ if __name__ == "__main__": if PYTHON_VERSION == 2.6: import inspect co = inspect.currentframe().f_code - tokens, customize = Scanner26().disassemble(co) - for t in tokens: - print(t.format()) + tokens, customize = Scanner26(show_asm=True).disassemble(co) else: print("Need to be Python 2.6 to demo; I am %s." % PYTHON_VERSION) diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index d50e0d5d..415dd40f 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -17,8 +17,8 @@ from xdis.opcodes import opcode_27 JUMP_OPs = opcode_27.JUMP_OPs class Scanner27(Scanner2): - def __init__(self): - super(Scanner27, self).__init__(2.7) + def __init__(self, show_asm=False): + super(Scanner27, self).__init__(2.7, show_asm) # opcodes that start statements self.stmt_opcodes = frozenset([ diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 4254df0b..b87afc06 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -40,10 +40,10 @@ import uncompyle6.scanner as scan class Scanner3(scan.Scanner): - def __init__(self, version): - super(Scanner3, self).__init__(version) + def __init__(self, version, show_asm=None): + super(Scanner3, self).__init__(version, show_asm) - def disassemble(self, co, classname=None, code_objects={}): + def disassemble(self, co, classname=None, code_objects={}, show_asm=None): """ Disassemble a Python 3 code object, returning a list of 'Token'. Various tranformations are made to assist the deparsing grammar. @@ -55,7 +55,11 @@ class Scanner3(scan.Scanner): dis.disassemble(). """ - # import dis; dis.disassemble(co) # DEBUG + show_asm = self.show_asm if not show_asm else show_asm + if self.show_asm in ('both', 'before'): + bytecode = Bytecode(co, self.opc) + for instr in bytecode.get_instructions(co): + print(instr._disassemble()) # Container for tokens tokens = [] @@ -176,32 +180,6 @@ class Scanner3(scan.Scanner): pass return tokens, {} - def disassemble_native(self, co, classname=None, code_objects={}): - """ - Like disassemble3 but doesn't try to adjust any opcodes. - """ - # Container for tokens - tokens = [] - - self.code = array('B', co.co_code) - - bytecode = Bytecode(co, self.opc) - - for inst in bytecode: - pattr = inst.argrepr - opname = inst.opname - tokens.append( - Token( - type_ = opname, - attr = inst.argval, - pattr = pattr, - offset = inst.offset, - linestart = inst.starts_line, - ) - ) - pass - return tokens, {} - def build_lines_data(self, code_obj): """ Generate various line-related helper data. diff --git a/uncompyle6/scanners/scanner32.py b/uncompyle6/scanners/scanner32.py index 123e5ad4..452070e9 100644 --- a/uncompyle6/scanners/scanner32.py +++ b/uncompyle6/scanners/scanner32.py @@ -8,16 +8,15 @@ scanner routine for Python 3. from __future__ import print_function -import xdis - # bytecode verification, verify(), uses JUMP_OPs from here -# JUMP_OPs = xdis.opcodes.opcode_32.JUMP_OPs +from xdis.opcodes import opcode_32 as opc +JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) from uncompyle6.scanners.scanner3 import Scanner3 class Scanner32(Scanner3): - def __init__(self): - super(Scanner3, self).__init__(3.2) + def __init__(self, show_asm=None): + super(Scanner3, self).__init__(3.2, show_asm) return pass diff --git a/uncompyle6/scanners/scanner33.py b/uncompyle6/scanners/scanner33.py index 3bf04d28..a59614a6 100644 --- a/uncompyle6/scanners/scanner33.py +++ b/uncompyle6/scanners/scanner33.py @@ -8,16 +8,15 @@ scanner routine for Python 3. from __future__ import print_function -import xdis - # bytecode verification, verify(), uses JUMP_OPs from here -JUMP_OPs = xdis.opcodes.opcode_33.JUMP_OPs +from xdis.opcodes import opcode_33 as opc +JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) from uncompyle6.scanners.scanner3 import Scanner3 class Scanner33(Scanner3): - def __init__(self): - super(Scanner3, self).__init__(3.3) + def __init__(self, show_asm=False): + super(Scanner3, self).__init__(3.3, show_asm) return pass diff --git a/uncompyle6/scanners/scanner34.py b/uncompyle6/scanners/scanner34.py index aa1f65b6..785d2e5e 100644 --- a/uncompyle6/scanners/scanner34.py +++ b/uncompyle6/scanners/scanner34.py @@ -17,8 +17,8 @@ JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) from uncompyle6.scanners.scanner3 import Scanner3 class Scanner34(Scanner3): - def __init__(self): - super(Scanner3, self).__init__(3.4) + def __init__(self, show_asm=None): + super(Scanner3, self).__init__(3.4, show_asm) return pass diff --git a/uncompyle6/scanners/scanner35.py b/uncompyle6/scanners/scanner35.py index 6efc1281..31320fc5 100644 --- a/uncompyle6/scanners/scanner35.py +++ b/uncompyle6/scanners/scanner35.py @@ -8,17 +8,16 @@ scanner routine for Python 3. from __future__ import print_function -from xdis.opcodes import opcode_35 as opc - from uncompyle6.scanners.scanner3 import Scanner3 # bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_35 as opc JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs) class Scanner35(Scanner3): - def __init__(self): - super(Scanner35, self).__init__(3.5) + def __init__(self, show_asm=None): + super(Scanner35, self).__init__(3.5, show_asm) return pass diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 80e07c7c..563c15f9 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -345,6 +345,16 @@ TABLE_DIRECT = { 'kv2': ( '%c: %c', 1, 2 ), 'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ), + ####################### + # Python 2.3 Additions + ####################### + + # Import style for 2.0-2.3 + 'importstmt20': ( '%|import %c\n', 1), + 'importstar20': ( '%|from %[1]{pattr} import *\n', ), + 'importfrom20': ( '%|from %[1]{pattr} import %c\n', 2 ), + 'importlist20': ( '%C', (0, maxint, ', ') ), + ####################### # Python 2.5 Additions ####################### @@ -526,7 +536,11 @@ class SourceWalker(GenericASTTraversal, object): self.classes = [] self.pending_newlines = 0 self.hide_internal = True + self.version = version + if 2.0 <= version <= 2.3: + TABLE_DIRECT['tryfinallystmt'] = ( + '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 4 ) return f = property(lambda s: s.params['f'], @@ -1658,7 +1672,6 @@ class SourceWalker(GenericASTTraversal, object): else: defparams = node[:args_node.attr] kw_args, annotate_args = (0, 0) - pos_args = args_node.attr pass if self.version > 3.0 and isLambda and iscode(node[-3].attr):