From ac45e5757c8c57af2c918c79ff4f6195268afec1 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 9 Jul 2016 05:59:02 -0400 Subject: [PATCH] Redo Python 2.3 to be more like the rest --- test/Makefile | 2 +- uncompyle6/parsers/parse2.py | 2 +- uncompyle6/parsers/parse23.py | 494 +-------------------- uncompyle6/parsers/parse24.py | 2 +- uncompyle6/parsers/parse27.py | 6 +- uncompyle6/scanner.py | 6 +- uncompyle6/scanners/scanner23.py | 729 +------------------------------ 7 files changed, 37 insertions(+), 1204 deletions(-) diff --git a/test/Makefile b/test/Makefile index 0fb70067..277ee5c5 100644 --- a/test/Makefile +++ b/test/Makefile @@ -51,7 +51,7 @@ check-bytecode-3: #: Check deparsing bytecode that works running Python 2 and Python 3 check-bytecode: check-bytecode-3 - $(PYTHON) test_pythonlib.py --bytecode-2.4 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 + $(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.4 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 #: Check deparsing Python 2.3 check-bytecode-2.3: diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 2f2dc7db..0285d032 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -24,7 +24,7 @@ class Python2Parser(PythonParser): super(Python2Parser, self).__init__(AST, 'stmts', debug=debug_parser) self.new_rules = set() - def p_print(self, args): + def p_print2(self, args): ''' stmt ::= print_items_stmt stmt ::= print_nl diff --git a/uncompyle6/parsers/parse23.py b/uncompyle6/parsers/parse23.py index 336a484d..20d29832 100644 --- a/uncompyle6/parsers/parse23.py +++ b/uncompyle6/parsers/parse23.py @@ -4,499 +4,16 @@ # Copyright (c) 1999 John Aycock import string -from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.parsers.astnode import AST -from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.parser import PythonParserSingle +from uncompyle6.parsers.parse24 import Python24Parser -class Python23Parser(PythonParser): +class Python23Parser(Python24Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): - super(Python23Parser, self).__init__(AST, 'stmts', debug=debug_parser) + super(Python24Parser, self).__init__(debug_parser) self.customized = {} - # FIXME: A lot of the functions below overwrite what is in parse.py which - # have more rules. Probly that should be stripped down more instead. - - def p_funcdef(self, args): - ''' - stmt ::= funcdef - funcdef ::= mkfunc designator - load_closure ::= load_closure LOAD_CLOSURE - load_closure ::= LOAD_CLOSURE - ''' - - def p_list_comprehension(self, args): - ''' - expr ::= list_compr - list_compr ::= BUILD_LIST_0 DUP_TOP _load_attr - designator list_iter del_stmt - - list_iter ::= list_for - list_iter ::= list_if - list_iter ::= lc_body - - _load_attr ::= LOAD_ATTR - _load_attr ::= - - _lcfor ::= GET_ITER LIST_COMPREHENSION_START FOR_ITER - _lcfor ::= LOAD_CONST FOR_LOOP - _lcfor2 ::= GET_ITER FOR_ITER - _lcfor2 ::= LOAD_CONST FOR_LOOP - - list_for ::= expr _lcfor designator list_iter - LIST_COMPREHENSION_END JUMP_ABSOLUTE - - list_for ::= expr _lcfor2 designator list_iter - JUMP_ABSOLUTE - - list_if ::= expr condjmp IF_THEN_START list_iter - IF_THEN_END _jump POP_TOP IF_ELSE_START IF_ELSE_END - - lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP - lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP - lc_body ::= LOAD_NAME expr LIST_APPEND - lc_body ::= LOAD_FAST expr LIST_APPEND - ''' - - def p_augmented_assign(self, args): - ''' - stmt ::= augassign1 - stmt ::= augassign2 - augassign1 ::= expr expr inplace_op designator - augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR - augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0 - augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1 - augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2 - augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3 - augassign2 ::= expr DUP_TOP LOAD_ATTR expr - inplace_op ROT_TWO STORE_ATTR - - inplace_op ::= INPLACE_ADD - inplace_op ::= INPLACE_SUBTRACT - inplace_op ::= INPLACE_MULTIPLY - inplace_op ::= INPLACE_DIVIDE - inplace_op ::= INPLACE_TRUE_DIVIDE - inplace_op ::= INPLACE_FLOOR_DIVIDE - inplace_op ::= INPLACE_MODULO - inplace_op ::= INPLACE_POWER - inplace_op ::= INPLACE_LSHIFT - inplace_op ::= INPLACE_RSHIFT - inplace_op ::= INPLACE_AND - inplace_op ::= INPLACE_XOR - inplace_op ::= INPLACE_OR - ''' - - def p_assign(self, args): - ''' - stmt ::= assign - assign ::= expr DUP_TOP designList - assign ::= expr designator - ''' - - def p_print(self, args): - ''' - stmt ::= print_stmt - stmt ::= print_stmt_nl - stmt ::= print_nl_stmt - print_stmt ::= expr PRINT_ITEM - print_nl_stmt ::= PRINT_NEWLINE - print_stmt_nl ::= print_stmt print_nl_stmt - ''' - - def p_print_to(self, args): - ''' - stmt ::= print_to - stmt ::= print_to_nl - stmt ::= print_nl_to - print_to ::= expr print_to_items POP_TOP - print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO - print_nl_to ::= expr PRINT_NEWLINE_TO - print_to_items ::= print_to_items print_to_item - print_to_items ::= print_to_item - print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO - ''' - # expr print_to* POP_TOP - # expr { print_to* } PRINT_NEWLINE_TO - - def p_import15(self, args): - ''' - stmt ::= importstmt - stmt ::= importfrom - - importstmt ::= IMPORT_NAME STORE_FAST - importstmt ::= IMPORT_NAME STORE_NAME - - importfrom ::= IMPORT_NAME importlist POP_TOP - importlist ::= importlist IMPORT_FROM - importlist ::= IMPORT_FROM - ''' - - # Python 2.0 - 2.3 imports - def p_import20_23(self, args): - ''' - stmt ::= importstmt20 - stmt ::= importfrom20 - stmt ::= importstar20 - - importstmt20 ::= LOAD_CONST import_as - importstar20 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR - - importfrom20 ::= LOAD_CONST IMPORT_NAME importlist20 POP_TOP - importlist20 ::= importlist20 import_as - importlist20 ::= import_as - import_as ::= IMPORT_NAME designator - import_as ::= IMPORT_NAME LOAD_ATTR designator - import_as ::= IMPORT_FROM designator - ''' - - def p_grammar(self, args): - ''' - stmts ::= stmts stmt - stmts ::= stmt - - stmts_opt ::= stmts - stmts_opt ::= passstmt - passstmt ::= - - stmt ::= classdef - stmt ::= call_stmt - call_stmt ::= expr POP_TOP - - stmt ::= return_stmt - return_stmt ::= expr RETURN_VALUE - - stmt ::= yield_stmt - yield_stmt ::= expr YIELD_STMT - yield_stmt ::= expr YIELD_VALUE - - stmt ::= break_stmt - break_stmt ::= BREAK_LOOP - - stmt ::= continue_stmt - continue_stmt ::= JUMP_ABSOLUTE - continue_stmt ::= CONTINUE_LOOP - - stmt ::= raise_stmt - raise_stmt ::= exprlist RAISE_VARARGS - raise_stmt ::= RAISE_VARARGS - - stmt ::= exec_stmt - exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT - exec_stmt ::= expr exprlist EXEC_STMT - - stmt ::= assert - stmt ::= assert2 - stmt ::= assert3 - stmt ::= assert4 - stmt ::= ifstmt - stmt ::= ifelsestmt - stmt ::= whilestmt - stmt ::= while1stmt - stmt ::= while12stmt - stmt ::= whileelsestmt - stmt ::= while1elsestmt - stmt ::= while12elsestmt - stmt ::= forstmt - stmt ::= forelsestmt - stmt ::= trystmt - stmt ::= tryfinallystmt - - stmt ::= del_stmt - del_stmt ::= DELETE_FAST - del_stmt ::= DELETE_NAME - del_stmt ::= DELETE_GLOBAL - del_stmt ::= expr DELETE_SLICE+0 - del_stmt ::= expr expr DELETE_SLICE+1 - del_stmt ::= expr expr DELETE_SLICE+2 - del_stmt ::= expr expr expr DELETE_SLICE+3 - del_stmt ::= delete_subscr - delete_subscr ::= expr expr DELETE_SUBSCR - del_stmt ::= expr DELETE_ATTR - - kwarg ::= LOAD_CONST expr - - classdef ::= LOAD_CONST expr mkfunc - CALL_FUNCTION_0 BUILD_CLASS designator - - condjmp ::= JUMP_IF_FALSE POP_TOP - condjmp ::= JUMP_IF_TRUE POP_TOP - - assert ::= expr JUMP_IF_FALSE POP_TOP - LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP - LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS - LOGIC_TEST_END LOGIC_TEST_END POP_TOP - assert2 ::= expr JUMP_IF_FALSE POP_TOP - LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP - LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS - LOGIC_TEST_END LOGIC_TEST_END POP_TOP - assert3 ::= expr JUMP_IF_TRUE POP_TOP - LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS - LOGIC_TEST_END POP_TOP - assert4 ::= expr JUMP_IF_TRUE POP_TOP - LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS - LOGIC_TEST_END POP_TOP - - _jump ::= JUMP_ABSOLUTE - _jump ::= JUMP_FORWARD - - ifstmt ::= expr condjmp - IF_THEN_START stmts_opt IF_THEN_END - _jump POP_TOP IF_ELSE_START IF_ELSE_END - - ifelsestmt ::= expr condjmp - IF_THEN_START stmts_opt IF_THEN_END - _jump POP_TOP IF_ELSE_START stmts IF_ELSE_END - - trystmt ::= SETUP_EXCEPT TRY_START stmts_opt - TRY_END POP_BLOCK _jump - except_stmt - - try_end ::= END_FINALLY TRY_ELSE_START TRY_ELSE_END - try_end ::= except_else - except_else ::= END_FINALLY TRY_ELSE_START stmts TRY_ELSE_END - - except_stmt ::= except_stmt except_cond - except_stmt ::= except_conds try_end - except_stmt ::= except try_end - except_stmt ::= try_end - - except_conds ::= except_conds except_cond - except_conds ::= - - except_cond ::= except_cond1 - except_cond ::= except_cond2 - except_cond1 ::= EXCEPT_START DUP_TOP expr COMPARE_OP - JUMP_IF_FALSE - POP_TOP POP_TOP POP_TOP POP_TOP - stmts_opt EXCEPT_END _jump POP_TOP - except_cond2 ::= EXCEPT_START DUP_TOP expr COMPARE_OP - JUMP_IF_FALSE - POP_TOP POP_TOP designator POP_TOP - stmts_opt EXCEPT_END _jump POP_TOP - except ::= EXCEPT_START POP_TOP POP_TOP POP_TOP - stmts_opt EXCEPT_END _jump - - tryfinallystmt ::= SETUP_FINALLY stmts_opt - POP_BLOCK LOAD_CONST - stmts_opt END_FINALLY - - _while1test ::= _jump JUMP_IF_FALSE POP_TOP - _while1test ::= - - whilestmt ::= SETUP_LOOP WHILE_START - expr condjmp - stmts_opt WHILE_END JUMP_ABSOLUTE - WHILE_ELSE_START POP_TOP POP_BLOCK WHILE_ELSE_END - - while1stmt ::= SETUP_LOOP _while1test WHILE1_START - stmts_opt WHILE1_END JUMP_ABSOLUTE - WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END - - while12stmt ::= SETUP_LOOP WHILE1_START - _jump JUMP_IF_FALSE POP_TOP - stmts_opt WHILE1_END JUMP_ABSOLUTE - WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END - - whileelsestmt ::= SETUP_LOOP WHILE_START - expr condjmp - stmts_opt WHILE_END JUMP_ABSOLUTE - WHILE_ELSE_START POP_TOP POP_BLOCK - stmts WHILE_ELSE_END - - while1elsestmt ::= SETUP_LOOP _while1test WHILE1_START - stmts_opt WHILE1_END JUMP_ABSOLUTE - WHILE1_ELSE_START POP_TOP POP_BLOCK - stmts WHILE1_ELSE_END - - while12elsestmt ::= SETUP_LOOP WHILE1_START - _jump JUMP_IF_FALSE POP_TOP - stmts_opt WHILE1_END JUMP_ABSOLUTE - WHILE1_ELSE_START POP_TOP POP_BLOCK - stmts WHILE1_ELSE_END - - _for ::= GET_ITER FOR_START FOR_ITER - _for ::= LOAD_CONST FOR_LOOP - - forstmt ::= SETUP_LOOP expr _for designator - stmts_opt FOR_END JUMP_ABSOLUTE - FOR_ELSE_START POP_BLOCK FOR_ELSE_END - forelsestmt ::= SETUP_LOOP expr _for designator - stmts_opt FOR_END JUMP_ABSOLUTE - FOR_ELSE_START POP_BLOCK stmts FOR_ELSE_END - - ''' - - def p_expr(self, args): - ''' - expr ::= load_closure mklambda - expr ::= mklambda - expr ::= SET_LINENO - expr ::= LOAD_FAST - expr ::= LOAD_NAME - expr ::= LOAD_CONST - expr ::= LOAD_GLOBAL - expr ::= LOAD_DEREF - expr ::= LOAD_LOCALS - expr ::= expr LOAD_ATTR - expr ::= binary_expr - expr ::= build_list - - binary_expr ::= expr expr binary_op - binary_op ::= BINARY_ADD - binary_op ::= BINARY_SUBTRACT - binary_op ::= BINARY_MULTIPLY - binary_op ::= BINARY_DIVIDE - binary_op ::= BINARY_TRUE_DIVIDE - binary_op ::= BINARY_FLOOR_DIVIDE - binary_op ::= BINARY_MODULO - binary_op ::= BINARY_LSHIFT - binary_op ::= BINARY_RSHIFT - binary_op ::= BINARY_AND - binary_op ::= BINARY_OR - binary_op ::= BINARY_XOR - binary_op ::= BINARY_POWER - - expr ::= binary_subscr - binary_subscr ::= expr expr BINARY_SUBSCR - expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR - expr ::= cmp - expr ::= expr UNARY_POSITIVE - expr ::= expr UNARY_NEGATIVE - expr ::= expr UNARY_CONVERT - expr ::= expr UNARY_INVERT - expr ::= expr UNARY_NOT - expr ::= mapexpr - expr ::= expr SLICE+0 - expr ::= expr expr SLICE+1 - expr ::= expr expr SLICE+2 - expr ::= expr expr expr SLICE+3 - expr ::= expr DUP_TOP SLICE+0 - expr ::= expr expr DUP_TOPX_2 SLICE+1 - expr ::= expr expr DUP_TOPX_2 SLICE+2 - expr ::= expr expr expr DUP_TOPX_3 SLICE+3 - expr ::= and - expr ::= and2 - expr ::= or - or ::= expr JUMP_IF_TRUE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END - and ::= expr JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END - and2 ::= _jump JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END - - cmp ::= cmp_list - cmp ::= compare - compare ::= expr expr COMPARE_OP - cmp_list ::= expr cmp_list1 ROT_TWO IF_ELSE_START POP_TOP - IF_ELSE_END - cmp_list1 ::= expr DUP_TOP ROT_THREE - COMPARE_OP JUMP_IF_FALSE POP_TOP - cmp_list1 - cmp_list1 ::= expr DUP_TOP ROT_THREE - COMPARE_OP JUMP_IF_FALSE POP_TOP - IF_THEN_START cmp_list1 - cmp_list1 ::= expr DUP_TOP ROT_THREE - COMPARE_OP JUMP_IF_FALSE POP_TOP - IF_THEN_START cmp_list2 - cmp_list1 ::= expr DUP_TOP ROT_THREE - COMPARE_OP JUMP_IF_FALSE POP_TOP - cmp_list2 - cmp_list2 ::= expr COMPARE_OP IF_THEN_END JUMP_FORWARD - mapexpr ::= BUILD_MAP kvlist - - kvlist ::= kvlist kv - kvlist ::= kvlist kv2 - kvlist ::= - - kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR - kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR - - exprlist ::= exprlist expr - exprlist ::= expr - ''' - - def nonterminal(self, nt, args): - collect = ('stmts', 'exprlist', 'kvlist') - - if nt in collect and len(args) > 1: - # - # Collect iterated thingies together. - # - rv = args[0] - rv.append(args[1]) - else: - rv = GenericASTBuilder.nonterminal(self, nt, args) - return rv - - def __ambiguity(self, children): - # only for debugging! to be removed hG/2000-10-15 - print children - return GenericASTBuilder.ambiguity(self, children) - - def resolve(self, list): - if len(list) == 2 and 'funcdef' in list and 'assign' in list: - return 'funcdef' - #print >> sys.stderr, 'resolve', str(list) - return GenericASTBuilder.resolve(self, list) - - def add_custom_rules(self, tokens, customize): - """ - Special handling for opcodes that take a variable number - of arguments -- we add a new rule for each: - - expr ::= {expr}^n BUILD_LIST_n - expr ::= {expr}^n BUILD_TUPLE_n - expr ::= {expr}^n BUILD_SLICE_n - unpack_list ::= UNPACK_LIST {expr}^n - unpack ::= UNPACK_TUPLE {expr}^n - unpack ::= UNPACK_SEQEUENE {expr}^n - mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n - mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n - expr ::= expr {expr}^n CALL_FUNCTION_n - expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP - expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP - expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP - """ - for k, v in customize.items(): - # avoid adding the same rule twice to this parser - if self.customized.has_key(k): - continue - self.customized[k] = None - - #nop_func = lambda self, args: None - op = k[:string.rfind(k, '_')] - if op in ('BUILD_LIST', 'BUILD_TUPLE'): - rule = 'build_list ::= ' + 'expr '*v + k - elif op == 'BUILD_SLICE': - rule = 'expr ::= ' + 'expr '*v + k - elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): - rule = 'unpack ::= ' + k + ' designator'*v - elif op == 'UNPACK_LIST': - rule = 'unpack_list ::= ' + k + ' designator'*v - elif op == 'DUP_TOPX': - # no need to add a rule - continue - #rule = 'dup_topx ::= ' + 'expr '*v + k - elif op == 'MAKE_FUNCTION': - self.addRule('mklambda ::= %s LOAD_LAMBDA %s' % - ('expr '*v, k), nop_func) - rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k) - elif op == 'MAKE_CLOSURE': - self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' % - ('expr '*v, k), nop_func) - rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k) - elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', - 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): - na = (v & 0xff) # positional parameters - nk = (v >> 8) & 0xff # keyword parameters - # number of apply equiv arguments: - nak = ( len(op)-len('CALL_FUNCTION') ) / 3 - rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \ - + 'expr ' * nak + k - else: - raise 'unknown customize token %s' % k - self.addRule(rule, nop_func) - return - pass - - class Python23ParserSingle(Python23Parser, PythonParserSingle): pass @@ -504,6 +21,7 @@ if __name__ == '__main__': # Check grammar p = Python23Parser() p.checkGrammar() + p.dumpGrammar() # local variables: # tab-width: 4 diff --git a/uncompyle6/parsers/parse24.py b/uncompyle6/parsers/parse24.py index b1a0d479..579fe4ce 100644 --- a/uncompyle6/parsers/parse24.py +++ b/uncompyle6/parsers/parse24.py @@ -36,7 +36,7 @@ class Python24Parser(Python25Parser): gen_comp_body ::= expr YIELD_VALUE ''' -class Python24ParserSingle(Python25Parser, PythonParserSingle): +class Python24ParserSingle(Python24Parser, PythonParserSingle): pass if __name__ == '__main__': diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index 7bb94890..29168671 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -1,5 +1,9 @@ -from uncompyle6.parser import PythonParserSingle +# Copyright (c) 2016 Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel + from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse2 import Python2Parser class Python27Parser(Python2Parser): diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index d10ad5d8..2cfe06e3 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -22,11 +22,7 @@ from uncompyle6 import PYTHON3 from uncompyle6.scanners.tok import Token # The byte code versions we support -if PYTHON3: - # Need to work out Python 2.3. ord's in PYTHON3 - PYTHON_VERSIONS = (2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5) -else: - PYTHON_VERSIONS = (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5) +PYTHON_VERSIONS = (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5) # FIXME: DRY if PYTHON3: diff --git a/uncompyle6/scanners/scanner23.py b/uncompyle6/scanners/scanner23.py index 463c83d6..0c59b29d 100644 --- a/uncompyle6/scanners/scanner23.py +++ b/uncompyle6/scanners/scanner23.py @@ -1,712 +1,27 @@ # Copyright (c) 2016 by Rocky Bernstein -# Copyright (c) 2005 by Dan Pascu -# Copyright (c) 2000-2002 by hartmut Goebel """ -Python 2.3 bytecode scanner +Python 2.4 bytecode scanner/deparser -This overlaps Python's 2.3's dis module, but it can be run from Python 3 and -other versions of Python. Also, we save token information for later -use in deparsing. +This overlaps Python's 2.4's dis module, but it can be run from +Python 3 and other versions of Python. Also, we save token +information for later use in deparsing. """ -from uncompyle6.scanners.scanner2 import Scanner2 -from uncompyle6.scanner import L65536 - -class Scanner23(Scanner2): - def __init__(self, show_asm=None): - super(Scanner23, self).__init__(2.3, show_asm) - # Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't - # Add an empty set make processing more uniform. - self.pop_jump_if_or_pop = frozenset([]) - - def disassemble(self, co, code_objects={}, show_asm=None): - """ - Disassemble a code object, returning a list of 'Token'. - - The main part of this procedure is modelled after - dis.disassemble(). - """ - - if self.show_asm in ('both', 'before'): - from xdis.bytecode import Bytecode - bytecode = Bytecode(co, self.opc) - for instr in bytecode.get_instructions(co): - print(instr._disassemble()) - - # Container for tokens - tokens = [] - - customize = {} - Token = self.Token # shortcut - - self.code = co.co_code - structures = self.find_structures(self.code) - #cf = self.find_jump_targets(code) - n = len(self.code) - i = 0 - extended_arg = 0 - free = None - while i < n: - offset = i - if structures.has_key(offset): - j = 0 - for elem in structures[offset]: - tokens.append(Token(elem, offset="%s_%d" % (offset, j))) - j += 1 - - c = self.code[i] - op = ord(c) - opname = self.opc.opname[op] - i += 1 - oparg = None; pattr = None - if op >= self.opc.HAVE_ARGUMENT: - oparg = ord(self.code[i]) + ord(self.code[i+1]) * 256 + extended_arg - extended_arg = 0 - i += 2 - if op == self.opc.EXTENDED_ARG: - extended_arg = oparg * L65536 - if op in self.opc.hasconst: - const = co.co_consts[oparg] - # We can't use inspect.iscode() because we may be - # using a different version of Python than the - # one that this was byte-compiled on. So the code - # types may mismatch. - if hasattr(const, 'co_name'): - oparg = const - const = oparg - if const.co_name == '': - assert opname == 'LOAD_CONST' - opname = 'LOAD_LAMBDA' - # verify uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - elif op in self.opc.hasname: - pattr = co.co_names[oparg] - elif op in self.opc.hasjrel: - pattr = repr(i + oparg) - elif op in self.opc.hasjabs: - pattr = repr(oparg) - elif op in self.opc.haslocal: - pattr = co.co_varnames[oparg] - elif op in self.opc.hascompare: - pattr = self.opc.cmp_op[oparg] - elif op in self.opc.hasfree: - if free is None: - free = co.co_cellvars + co.co_freevars - pattr = free[oparg] - - if opname == 'SET_LINENO': - continue - elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE', - 'UNPACK_LIST', 'UNPACK_TUPLE', 'UNPACK_SEQUENCE', - 'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE', - 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', - 'CALL_FUNCTION_VAR_KW', 'DUP_TOPX', - ): - opname = '%s_%d' % (opname, oparg) - customize[opname] = oparg - - tokens.append(Token(opname, oparg, pattr, offset)) - pass - - if self.show_asm: - for t in tokens: - print(t) - print() - - return tokens, customize - - def __get_target(self, code, pos, op=None): - if op is None: - op = ord(code[pos]) - target = ord(code[pos+1]) + ord(code[pos+2]) * 256 - if op in self.self.opc.hasjrel: - target += pos + 3 - return target - - def __first_instr(self, code, start, end, instr, target=None, exact=True): - """ - Find the first in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - - assert(start>=0 and end in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely if exact - is True, or if exact is False, the instruction which has a target - closest to will be returned. - - Return index to it or None if not found. - """ - - assert(start>=0 and end in the block from start to end. - is any python bytecode instruction or a list of opcodes - If is an opcode with a target (like a jump), a target - destination can be specified which must match precisely. - - Return a list with indexes to them or [] if none found. - """ - - assert(start>=0 and end= HAVE_ARGUMENT: - break - ops[pos] = op - opp[pos] = x - pos += 1 - x += 1 - if ops[0] == POP_TOP and ops[1] == END_FINALLY and opp[1] == end: - return jmp - if ops[0] == POP_TOP and ops[1] == DUP_TOP: - return jmp - if ops[0] == ops[1] == ops[2] == ops[3] == POP_TOP: - return jmp - start = jmp + 3 - return None - - def __list_comprehension(self, code, pos, op=None): - """ - Determine if there is a list comprehension structure starting at pos - """ - BUILD_LIST = self.opc.opmap['BUILD_LIST'] - DUP_TOP = self.opc.opmap['DUP_TOP'] - LOAD_ATTR = self.opc.opmap['LOAD_ATTR'] - if op is None: - op = ord(code[pos]) - if op != BUILD_LIST: - return 0 - try: - elems = ord(code[pos+1]) + ord(code[pos+2])*256 - codes = (op, elems, ord(code[pos+3]), ord(code[pos+4])) - except IndexError: - return 0 - return (codes==(BUILD_LIST, 0, DUP_TOP, LOAD_ATTR)) - - def __ignore_if(self, code, pos): - """ - Return true if this 'if' is to be ignored. - """ - POP_TOP = self.opc.opmap['POP_TOP'] - COMPARE_OP = self.opc.opmap['COMPARE_OP'] - EXCEPT_MATCH = self.opc.copmap['exception match'] - - ## If that was added by a while loop - if pos in self.__ignored_ifs: - return 1 - - # Check if we can test only for POP_TOP for this -Dan - # Maybe need to be done as above (skip SET_LINENO's) - if (ord(code[pos-3])==COMPARE_OP and - (ord(code[pos-2]) + ord(code[pos-1])*256)==EXCEPT_MATCH and - ord(code[pos+3])==POP_TOP and - ord(code[pos+4])==POP_TOP and - ord(code[pos+5])==POP_TOP and - ord(code[pos+6])==POP_TOP): - return 1 ## Exception match - return 0 - - def __fix_parent(self, code, target, parent): - """Fix parent boundaries if needed""" - JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE'] - start = parent['start'] - end = parent['end'] - - ## Map the second start point for 'while 1:' in python 2.3+ to start - try: target = self.__while1[target] - except: pass - if target >= start or end-start < 3 or target not in self.__loops: - return - if ord(code[end-3])==JUMP_ABSOLUTE: - cont_target = self.__get_target(code, end-3, JUMP_ABSOLUTE) - if target == cont_target: - parent['end'] = end-3 - - def __restrict_to_parent(self, target, parent): - """Restrict pos to parent boundaries.""" - if not (parent['start'] < target < parent['end']): - target = parent['end'] - return target - - def __detect_structure(self, code, pos, op=None): - """ - Detect structures and their boundaries to fix optimizied jumps - in python2.3+ - """ - - # TODO: check the struct boundaries more precisely -Dan - - SETUP_LOOP = self.opc.opmap['SETUP_LOOP'] - FOR_ITER = self.opc.opmap['FOR_ITER'] - GET_ITER = self.opc.opmap['GET_ITER'] - SETUP_EXCEPT = self.opc.opmap['SETUP_EXCEPT'] - JUMP_FORWARD = self.opc.opmap['JUMP_FORWARD'] - JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE'] - JUMP_IF_FALSE = self.opc.opmap['JUMP_IF_FALSE'] - JUMP_IF_TRUE = self.opc.opmap['JUMP_IF_TRUE'] - END_FINALLY = self.opc.opmap['END_FINALLY'] - POP_TOP = self.opc.opmap['POP_TOP'] - POP_BLOCK = self.opc.opmap['POP_BLOCK'] - try: SET_LINENO = self.opc.opmap['SET_LINENO'] - except: SET_LINENO = None - - # Ev remove this test and make op a mandatory argument -Dan - if op is None: - op = ord(code[pos]) - - ## Detect parent structure - parent = self.__structs[0] - start = parent['start'] - end = parent['end'] - for s in self.__structs: - if s['type'] == 'LOGIC_TEST': - continue ## logic tests are not structure containers - _start = s['start'] - _end = s['end'] - if (_start <= pos < _end) and (_start >= start and _end < end): - start = _start - end = _end - parent = s - - ## We need to know how many new structures were added in this run - origStructCount = len(self.__structs) - - if op == SETUP_LOOP: - start = pos+3 - # this is for python2.2. Maybe we can optimize and not call this for 2.3+ -Dan - while ord(code[start]) == SET_LINENO: - start += 3 - start_op = ord(code[start]) - while1 = False - if start_op in (JUMP_FORWARD, JUMP_ABSOLUTE): - ## This is a while 1 (has a particular structure) - start = self.__get_target(code, start, start_op) - start = self.__restrict_to_parent(start, parent) - self.__while1[pos+3] = start ## map between the 2 start points - while1 = True - if start_op == JUMP_ABSOLUTE and ord(code[pos+6])==JUMP_IF_FALSE: - # special `while 1: pass` in python2.3 - self.__fixed_jumps[pos+3] = start - target = self.__get_target(code, pos, op) - end = self.__restrict_to_parent(target, parent) - if target != end: - self.__fixed_jumps[pos] = end - jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, - start, False) - assert(jump_back is not None) - target = self.__get_target(code, jump_back, JUMP_ABSOLUTE) - i = target - while i < jump_back and ord(code[i])==SET_LINENO: - i += 3 - if ord(code[i]) in (FOR_ITER, GET_ITER): - loop_type = 'FOR' - else: - lookup = [JUMP_IF_FALSE, JUMP_IF_TRUE] - test = self.__first_instr(code, pos+3, jump_back, lookup, jump_back+3) - if test is None: - # this is a special while 1 structure in python 2.4 - while1 = True - else: - #assert(test is not None) - test_target = self.__get_target(code, test) - test_target = self.__restrict_to_parent(test_target, parent) - next = (ord(code[test_target]), ord(code[test_target+1])) - if next == (POP_TOP, POP_BLOCK): - self.__ignored_ifs.append(test) - else: - while1 = True - if while1 == True: - loop_type = 'WHILE1' - else: - loop_type = 'WHILE' - - self.__loops.append(target) - self.__structs.append({'type': loop_type, - 'start': target, - 'end': jump_back}) - self.__structs.append({'type': loop_type + '_ELSE', - 'start': jump_back+3, - 'end': end}) - elif self.__list_comprehension(code, pos, op): - get_iter = self.__first_instr(code, pos+7, end, GET_ITER) - for_iter = self.__first_instr(code, get_iter, end, FOR_ITER) - assert(get_iter is not None and for_iter is not None) - start = get_iter+1 - target = self.__get_target(code, for_iter, FOR_ITER) - end = self.__restrict_to_parent(target, parent) - jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, - start, False) - assert(jump_back is not None) - target = self.__get_target(code, jump_back, JUMP_ABSOLUTE) - start = self.__restrict_to_parent(target, parent) - self.__structs.append({'type': 'LIST_COMPREHENSION', - 'start': start, - 'end': jump_back}) - elif op == SETUP_EXCEPT: - start = pos+3 - target = self.__get_target(code, pos, op) - # this should be redundant as it can't be out of boundaries -Dan - # check if it can be removed - end = self.__restrict_to_parent(target, parent) - if target != end: - #print "!!!!found except target != end: %s %s" % (target, end) - self.__fixed_jumps[pos] = end - ## Add the try block - self.__structs.append({'type': 'TRY', - 'start': start, - 'end': end-4}) - ## Now isolate the except and else blocks - start = end - target = self.__get_target(code, start-3) - #self.__fix_parent(code, target, parent) - try_else_start = target - end = self.__restrict_to_parent(target, parent) - if target != end: - self.__fixed_jumps[start-3] = end - - end_finally = self.__last_instr(code, start, end, END_FINALLY) - assert(end_finally is not None) - lookup = [JUMP_ABSOLUTE, JUMP_FORWARD] - jump_end = self.__last_instr(code, start, end_finally, lookup) - assert(jump_end is not None) - - target = self.__get_target(code, jump_end) - if target == try_else_start: - end = end_finally+1 - else: - end = self.__restrict_to_parent(target, parent) - if target != end: - self.__fixed_jumps[jump_end] = end - - ## Add the try-else block - self.__structs.append({'type': 'TRY_ELSE', - 'start': end_finally+1, - 'end': end}) - ## Add the except blocks - i = start - while i < end_finally: - jmp = self.__next_except_jump(code, i, end_finally, target) - if jmp is None: - break - if i!=start and ord(code[i])==POP_TOP: - pos = i + 1 - else: - pos = i - self.__structs.append({'type': 'EXCEPT', - 'start': pos, - 'end': jmp}) - if target != end: - self.__fixed_jumps[jmp] = end - i = jmp+3 - elif op == JUMP_ABSOLUTE: - ## detect if we have a 'foo and bar and baz...' structure - ## that was optimized (thus the presence of JUMP_ABSOLUTE) - return # no longer needed. just return. remove this elif later -Dan - if pos in self.__fixed_jumps: - return ## Already marked - if parent['end'] - pos < 7: - return - next = (ord(code[pos+3]), ord(code[pos+6])) - if next != (JUMP_IF_FALSE, POP_TOP): - return - - end = self.__get_target(code, pos+3) - ifs = self.__all_instr(code, pos, end, JUMP_IF_FALSE, end) - - ## Test if all JUMP_IF_FALSE we have found belong to the - ## structure (may not be needed but it doesn't hurt) - count = len(ifs) - if count < 2: - return - for jif in ifs[1:]: - before = ord(code[jif-3]) - after = ord(code[jif+3]) - if (before not in (JUMP_FORWARD, JUMP_ABSOLUTE) or - after != POP_TOP): - return - - ## All tests passed. Perform fixes - self.__ignored_ifs.extend(ifs) - for i in range(count-1): - self.__fixed_jumps[ifs[i]-3] = ifs[i+1]-3 - elif op in (JUMP_IF_FALSE, JUMP_IF_TRUE): - if self.__ignore_if(code, pos): - return - start = pos+4 ## JUMP_IF_FALSE/TRUE + POP_TOP - target = self.__get_target(code, pos, op) - if parent['start'] <= target <= parent['end']: - if ord(code[target-3]) in (JUMP_ABSOLUTE, JUMP_FORWARD): - if_end = self.__get_target(code, target-3) - #self.__fix_parent(code, if_end, parent) - end = self.__restrict_to_parent(if_end, parent) - if ord(code[end-3]) == JUMP_ABSOLUTE: - else_end = self.__get_target(code, end-3) - if if_end == else_end and if_end in self.__loops: - end -= 3 ## skip the continue instruction - if if_end != end: - self.__fixed_jumps[target-3] = end - self.__structs.append({'type': 'IF_THEN', - 'start': start, - 'end': target-3}) - self.__structs.append({'type': 'IF_ELSE', - 'start': target+1, - 'end': end}) - else: - self.__structs.append({'type': 'LOGIC_TEST', - 'start': start, - 'end': target}) - - def find_jump_targets(self, code): - """ - Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - This procedure is modelled after self.opc.findlables(), but here - for each target the number of jumps are counted. - """ - HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT - - hasjrel = self.opc.hasjrel - hasjabs = self.opc.hasjabs - - needFixing = (self.__pyversion >= 2.3) - - n = len(code) - self.__structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - self.__loops = [] ## All loop entry points - self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point - self.__fixed_jumps = {} ## Map fixed jumps to their real destination - self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore - - targets = {} - i = 0 - while i < n: - op = ord(code[i]) - - if needFixing: - ## Determine structures and fix jumps for 2.3+ - self.__detect_structure(code, i, op) - - if op >= HAVE_ARGUMENT: - label = self.__fixed_jumps.get(i) - if label is None: - oparg = ord(code[i+1]) + ord(code[i+2]) * 256 - if op in hasjrel: - label = i + 3 + oparg - elif op in hasjabs: - # todo: absolute jumps - pass - if label is not None: - targets[label] = targets.get(label, 0) + 1 - i += 3 - else: - i += 1 - return targets - - def find_structures(self, code): - """ - Detect all structures in a byte code. - - Return a mapping from offset to a list of keywords that should - be inserted at that position. - """ - HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT - - n = len(code) - self.__structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - self.__loops = [] ## All loop entry points - self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point - self.__fixed_jumps = {} ## Map fixed jumps to their real destination - self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore - - i = 0 - while i < n: - op = ord(code[i]) - if op >= HAVE_ARGUMENT: - i += 3 - else: - i += 1 - #from pprint import pprint - #print - #print "structures: ", - #pprint(self.__structs) - #print "loops: ", - #pprint(self.__loops) - #print "while1: ", - #pprint(self.__while1) - #print "fixed jumps: ", - #pprint(self.__fixed_jumps) - #print "ignored ifs: ", - #pprint(self.__ignored_ifs) - #print - points = {} - endpoints = {} - for s in self.__structs: - typ = s['type'] - start = s['start'] - end = s['end'] - if typ == 'root': - continue - ## startpoints of the outer structures must come first - ## endpoints of the inner structures must come first - points.setdefault(start, []).append("%s_START" % typ) - endpoints.setdefault(end, []).insert(0, "%s_END" % typ) - for k, v in endpoints.items(): - points.setdefault(k, []).extend(v) - #print "points: ", - #pprint(points) - #print - return points - -# __scanners = {} - -# def getscanner(version): -# if not __scanners.has_key(version): -# __scanners[version] = Scanner(version) -# return __scanners[version] - -if __name__ == "__main__": - from uncompyle6 import PYTHON_VERSION - if PYTHON_VERSION == 2.3: - import inspect - co = inspect.currentframe().f_code - tokens, customize = Scanner23().disassemble(co) - for t in tokens: - print(t.format()) - else: - print("Need to be Python 2.3 to demo; I am %s." % - PYTHON_VERSION) - - -# local variables: -# tab-width: 4 +import uncompyle6.scanners.scanner24 as scan + +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_23 +JUMP_OPs = opcode_23.JUMP_OPs + +# We base this off of 2.5 instead of the other way around +# because we cleaned things up this way. +# The history is that 2.7 support is the cleanest, +# then from that we got 2.6 and so on. +class Scanner23(scan.Scanner24): + def __init__(self, show_asm): + scan.Scanner24.__init__(self, show_asm) + # These are the only differences in initialization between + # 2.3-2.6 + self.version = 2.3 + self.genexpr_name = ''; + return