# Copyright (c) 2015-2017 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock # # See LICENSE for license """ A spark grammar for Python 3.x. However instead of terminal symbols being the usual ASCII text, e.g. 5, myvariable, "for", etc. they are CPython Bytecode tokens, e.g. "LOAD_CONST 5", "STORE NAME myvariable", "SETUP_LOOP", etc. If we succeed in creating a parse tree, then we have a Python program that a later phase can turn into a sequence of ASCII text. """ from __future__ import print_function from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.astnode import AST from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from xdis import PYTHON3 class Python3Parser(PythonParser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): self.added_rules = set() super(Python3Parser, self).__init__(AST, 'stmts', debug=debug_parser) self.new_rules = set() def p_comprehension3(self, args): """ # Python3 scanner adds LOAD_LISTCOMP. Python3 does list comprehension like # other comprehensions (set, dictionary). # Our "continue" heuristic - in two successive JUMP_BACKS, the first # one may be a continue - sometimes classifies a JUMP_BACK # as a CONTINUE. The two are kind of the same in a comprehension. comp_for ::= expr _for store comp_iter CONTINUE comp_for ::= expr _for store comp_iter JUMP_BACK list_for ::= expr FOR_ITER store list_iter jb_or_c # This is seen in PyPy, but possibly it appears on other Python 3? list_if ::= expr jmp_false list_iter COME_FROM list_if_not ::= expr jmp_true list_iter COME_FROM jb_or_c ::= JUMP_BACK jb_or_c ::= CONTINUE stmt ::= setcomp_func setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER store comp_iter COME_FROM JUMP_BACK RETURN_VALUE RETURN_LAST comp_body ::= dict_comp_body comp_body ::= set_comp_body dict_comp_body ::= expr expr MAP_ADD set_comp_body ::= expr SET_ADD # See also common Python p_list_comprehension """ def p_dictcomp3(self, args): """" expr ::= dict_comp stmt ::= dictcomp_func dictcomp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST dict_comp ::= LOAD_DICTCOMP LOAD_CONST MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 """ def p_grammar(self, args): ''' sstmt ::= stmt sstmt ::= ifelsestmtr sstmt ::= return_stmt RETURN_LAST return_if_stmts ::= return_if_stmt come_from_opt return_if_stmts ::= _stmts return_if_stmt return_if_stmt ::= ret_expr RETURN_END_IF stmt ::= break_stmt break_stmt ::= BREAK_LOOP stmt ::= continue_stmt continue_stmt ::= CONTINUE continue_stmt ::= CONTINUE_LOOP continue_stmts ::= _stmts lastl_stmt continue_stmt continue_stmts ::= lastl_stmt continue_stmt continue_stmts ::= continue_stmt stmt ::= raise_stmt0 stmt ::= raise_stmt1 stmt ::= raise_stmt2 raise_stmt0 ::= RAISE_VARARGS_0 raise_stmt1 ::= expr RAISE_VARARGS_1 raise_stmt2 ::= expr expr RAISE_VARARGS_2 del_stmt ::= delete_subscr delete_subscr ::= expr expr DELETE_SUBSCR del_stmt ::= expr DELETE_ATTR kwarg ::= LOAD_CONST expr kwargs ::= kwarg* classdef ::= build_class store # Python3 introduced LOAD_BUILD_CLASS # Other definitions are in a custom rule build_class ::= LOAD_BUILD_CLASS mkfunc expr call CALL_FUNCTION_3 stmt ::= classdefdeco classdefdeco ::= classdefdeco1 store classdefdeco1 ::= expr classdefdeco1 CALL_FUNCTION_1 classdefdeco1 ::= expr classdefdeco2 CALL_FUNCTION_1 assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 COME_FROM assert_expr ::= expr assert_expr ::= assert_expr_or assert_expr ::= assert_expr_and assert_expr_or ::= assert_expr jmp_true expr assert_expr_and ::= assert_expr jmp_false expr ifstmt ::= testexpr _ifstmts_jump testexpr ::= testfalse testexpr ::= testtrue testfalse ::= expr jmp_false testtrue ::= expr jmp_true _ifstmts_jump ::= return_if_stmts _ifstmts_jump ::= c_stmts_opt COME_FROM iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK COME_FROM_LOOP # These are used to keep AST indices the same jump_forward_else ::= JUMP_FORWARD ELSE jump_absolute_else ::= JUMP_ABSOLUTE ELSE # Note: in if/else kinds of statements, we err on the side # of missing "else" clauses. Therefore we include grammar # rules with and without ELSE. ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite opt_come_from_except ifelsestmt ::= testexpr c_stmts_opt jump_forward_else else_suite _come_from # ifelsestmt ::= testexpr c_stmts_opt jump_forward_else # passstmt _come_from ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec ifelsestmtc ::= testexpr c_stmts_opt jump_absolute_else else_suitec ifelsestmtr ::= testexpr return_if_stmts return_stmts ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel ifelsestmtl ::= testexpr c_stmts_opt cf_jump_back else_suitel cf_jump_back ::= COME_FROM JUMP_BACK # FIXME: this feels like a hack. Is it just 1 or two # COME_FROMs? the parsed tree for this and even with just the # one COME_FROM for Python 2.7 seems to associate the # COME_FROM targets from the wrong places trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK try_middle opt_come_from_except # this is nested inside a trystmt tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_FINALLY suite_stmts_opt END_FINALLY tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK try_middle else_suite come_from_except_clauses tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK try_middle else_suite come_froms tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK try_middle else_suitec come_from_except_clauses tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK try_middle else_suitel come_from_except_clauses try_middle ::= jmp_abs COME_FROM except_stmts END_FINALLY try_middle ::= jmp_abs COME_FROM_EXCEPT except_stmts END_FINALLY # FIXME: remove this try_middle ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY COME_FROM try_middle ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY COME_FROM_EXCEPT except_stmts ::= except_stmts except_stmt except_stmts ::= except_stmt except_stmt ::= except_cond1 except_suite except_stmt ::= except_cond2 except_suite except_stmt ::= except_cond2 except_suite_finalize except_stmt ::= except ## FIXME: what's except_pop_except? except_stmt ::= except_pop_except # Python3 introduced POP_EXCEPT except_suite ::= c_stmts_opt POP_EXCEPT jump_except jump_except ::= JUMP_ABSOLUTE jump_except ::= JUMP_BACK jump_except ::= JUMP_FORWARD jump_except ::= CONTINUE # This is used in Python 3 in # "except ... as e" to remove 'e' after the c_stmts_opt finishes except_suite_finalize ::= SETUP_FINALLY c_stmts_opt except_var_finalize END_FINALLY _jump except_var_finalize ::= POP_BLOCK POP_EXCEPT LOAD_CONST COME_FROM_FINALLY LOAD_CONST store del_stmt except_suite ::= return_stmts except_cond1 ::= DUP_TOP expr COMPARE_OP jmp_false POP_TOP POP_TOP POP_TOP except_cond2 ::= DUP_TOP expr COMPARE_OP jmp_false POP_TOP store POP_TOP except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt POP_EXCEPT _jump except ::= POP_TOP POP_TOP POP_TOP return_stmts jmp_abs ::= JUMP_ABSOLUTE jmp_abs ::= JUMP_BACK withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY withasstmt ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY ## FIXME: Right now we have erroneous jump targets ## This below is probably not correct when the COME_FROM is put in the right place and ::= expr jmp_false expr COME_FROM or ::= expr jmp_true expr COME_FROM # # something like the below is needed when the jump targets are fixed ## or ::= expr JUMP_IF_TRUE_OR_POP COME_FROM expr ## and ::= expr JUMP_IF_FALSE_OR_POP COME_FROM expr ''' def p_misc3(self, args): """ try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM_EXCEPT_CLAUSE for_block ::= l_stmts_opt come_from_loops JUMP_BACK for_block ::= l_stmts iflaststmtl ::= testexpr c_stmts_opt """ def p_def_annotations3(self, args): """ # Annotated functions stmt ::= function_def_annotate function_def_annotate ::= mkfunc_annotate store mkfuncdeco0 ::= mkfunc_annotate # This has the annotation value. # LOAD_NAME is used in an annotation type like # int, float, str annotate_arg ::= LOAD_NAME # LOAD_CONST is used in an annotation string annotate_arg ::= expr # This stores the tuple of parameter names # that have been annotated annotate_tuple ::= LOAD_CONST """ def p_come_from3(self, args): """ opt_come_from_except ::= COME_FROM_EXCEPT opt_come_from_except ::= come_froms opt_come_from_except ::= come_from_except_clauses come_froms ::= COME_FROM* come_from_except_clauses ::= COME_FROM_EXCEPT_CLAUSE+ come_from_loops ::= COME_FROM_LOOP* """ def p_jump3(self, args): """ jmp_false ::= POP_JUMP_IF_FALSE jmp_true ::= POP_JUMP_IF_TRUE # FIXME: Common with 2.7 ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM # compare_chained1 is used exclusively in chained_compare compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP compare_chained1 COME_FROM compare_chained1 ::= expr DUP_TOP ROT_THREE COMPARE_OP JUMP_IF_FALSE_OR_POP compare_chained2 COME_FROM """ def p_stmt3(self, args): """ stmt ::= return_closure return_closure ::= LOAD_CLOSURE RETURN_VALUE RETURN_LAST stmt ::= whileTruestmt ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite _come_from """ def p_loop_stmt3(self, args): """ forstmt ::= SETUP_LOOP expr _for store for_block POP_BLOCK come_from_loops forelsestmt ::= SETUP_LOOP expr _for store for_block POP_BLOCK else_suite COME_FROM_LOOP forelselaststmt ::= SETUP_LOOP expr _for store for_block POP_BLOCK else_suitec COME_FROM_LOOP forelselaststmtl ::= SETUP_LOOP expr _for store for_block POP_BLOCK else_suitel COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr l_stmts_opt COME_FROM JUMP_BACK POP_BLOCK COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM_LOOP whilestmt ::= SETUP_LOOP testexpr return_stmts POP_BLOCK COME_FROM_LOOP while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK else_suite COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM_LOOP # FIXME: Python 3.? starts adding branch optimization? Put this starting there. while1stmt ::= SETUP_LOOP l_stmts while1stmt ::= SETUP_LOOP l_stmts COME_FROM_LOOP while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK COME_FROM_LOOP while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite COME_FROM_LOOP # FIXME: investigate - can code really produce a NOP? whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK NOP COME_FROM_LOOP whileTruestmt ::= SETUP_LOOP l_stmts_opt JUMP_BACK POP_BLOCK NOP COME_FROM_LOOP forstmt ::= SETUP_LOOP expr _for store for_block POP_BLOCK NOP COME_FROM_LOOP """ def p_generator_exp3(self, args): ''' load_genexpr ::= LOAD_GENEXPR load_genexpr ::= BUILD_TUPLE_1 LOAD_GENEXPR LOAD_CONST # Is there something general going on here? dict_comp ::= load_closure LOAD_DICTCOMP LOAD_CONST MAKE_CLOSURE_0 expr GET_ITER CALL_FUNCTION_1 ''' def p_expr3(self, args): """ expr ::= conditionalnot conditionalnot ::= expr jmp_true expr jump_forward_else expr COME_FROM # a JUMP_FORWARD to another JUMP_FORWARD can get turned into # a JUMP_ABSOLUTE with no COME_FROM conditional ::= expr jmp_false expr jump_absolute_else expr """ @staticmethod def call_fn_name(token): """Customize CALL_FUNCTION to add the number of positional arguments""" if token.attr is not None: return '%s_%i' % (token.kind, token.attr) else: return '%s_0' % (token.kind) def custom_build_class_rule(self, opname, i, token, tokens, customize): ''' # Should the first rule be somehow folded into the 2nd one? build_class ::= LOAD_BUILD_CLASS mkfunc LOAD_CLASSNAME {expr}^n-1 CALL_FUNCTION_n LOAD_CONST CALL_FUNCTION_n build_class ::= LOAD_BUILD_CLASS mkfunc expr call_function CALL_FUNCTION_3 ''' # FIXME: I bet this can be simplified # look for next MAKE_FUNCTION for i in range(i+1, len(tokens)): if tokens[i].kind.startswith('MAKE_FUNCTION'): break elif tokens[i].kind.startswith('MAKE_CLOSURE'): break pass assert i < len(tokens), "build_class needs to find MAKE_FUNCTION or MAKE_CLOSURE" assert tokens[i+1].kind == 'LOAD_CONST', \ "build_class expecting CONST after MAKE_FUNCTION/MAKE_CLOSURE" for i in range(i, len(tokens)): if tokens[i].kind == 'CALL_FUNCTION': call_fn_tok = tokens[i] break assert call_fn_tok, "build_class custom rule needs to find CALL_FUNCTION" # customize build_class rule # FIXME: What's the deal with the two rules? Different Python versions? # Different situations? Note that the above rule is based on the CALL_FUNCTION # token found, while this one doesn't. call_function = self.call_fn_name(call_fn_tok) args_pos = call_fn_tok.attr & 0xff args_kw = (call_fn_tok.attr >> 8) & 0xff rule = ("build_class ::= LOAD_BUILD_CLASS mkfunc %s" "%s" % (('expr ' * (args_pos - 1) + ('kwarg ' * args_kw)), call_function)) self.add_unique_rule(rule, opname, token.attr, customize) return def custom_classfunc_rule(self, opname, token, customize, possible_class_decorator, seen_GET_AWAITABLE_YIELD_FROM, next_token): """ call ::= expr {expr}^n CALL_FUNCTION_n call ::= expr {expr}^n CALL_FUNCTION_VAR_n call ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n call ::= expr {expr}^n CALL_FUNCTION_KW_n classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc {expr}^n-1 CALL_FUNCTION_n """ # Low byte indicates number of positional paramters, # high byte number of positional parameters args_pos = token.attr & 0xff args_kw = (token.attr >> 8) & 0xff args_kw = (token.attr >> 8) & 0xff # args_ann = (token.attr >> 16) & 0x7FFF # Additional exprs for * and ** args: # 0 if neither # 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW # 2 for * and ** args (CALL_FUNCTION_VAR_KW). # Yes, this computation based on instruction name is a little bit hoaky. nak = ( len(opname)-len('CALL_FUNCTION') ) // 3 token.kind = self.call_fn_name(token) uniq_param = args_kw + args_pos if self.version == 3.5 and opname.startswith('CALL_FUNCTION_VAR'): # Python 3.5 changes the stack position of *args. KW args come # after *args. # Python 3.6+ replaces CALL_FUNCTION_VAR_KW with CALL_FUNCTION_EX if opname.endswith('KW'): kw = 'expr ' else: kw = '' rule = ('call ::= expr expr ' + ('pos_arg ' * args_pos) + ('kwarg ' * args_kw) + kw + token.kind) self.add_unique_rule(rule, token.kind, uniq_param, customize) if self.version >= 3.6 and opname == 'CALL_FUNCTION_EX_KW': rule = ('call36 ::= ' 'expr build_tuple_unpack_with_call build_map_unpack_with_call ' 'CALL_FUNCTION_EX_KW_1') self.add_unique_rule(rule, token.kind, uniq_param, customize) rule = 'call ::= call36' else: rule = ('call ::= expr ' + ('pos_arg ' * args_pos) + ('kwarg ' * args_kw) + 'expr ' * nak + token.kind) self.add_unique_rule(rule, token.kind, uniq_param, customize) if self.version >= 3.5 and seen_GET_AWAITABLE_YIELD_FROM: rule = ('async_call ::= expr ' + ('pos_arg ' * args_pos) + ('kwarg ' * args_kw) + 'expr ' * nak + token.kind + ' GET_AWAITABLE LOAD_CONST YIELD_FROM') self.add_unique_rule(rule, token.kind, uniq_param, customize) self.add_unique_rule('expr ::= async_call', token.kind, uniq_param, customize) if possible_class_decorator: if next_token == 'CALL_FUNCTION' and next_token.attr == 1: rule = ('classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d' % (('expr ' * (args_pos-1)), opname, args_pos)) self.add_unique_rule(rule, token.kind, uniq_param, customize) def add_make_function_rule(self, rule, opname, attr, customize): """Python 3.3 added a an addtional LOAD_CONST before MAKE_FUNCTION and this has an effect on many rules. """ new_rule = rule % (('LOAD_CONST ') * (1 if self.version >= 3.3 else 0)) self.add_unique_rule(new_rule, opname, attr, customize) def add_custom_rules(self, tokens, customize): """ Special handling for opcodes such as those that take a variable number of arguments -- we add a new rule for each: unpack_list ::= UNPACK_LIST_n {expr}^n unpack ::= UNPACK_TUPLE_n {expr}^n unpack ::= UNPACK_SEQEUENCE_n {expr}^n unpack_ex ::= UNPACK_EX_b_a {expr}^(a+b) # build_class (see load_build_class) # Even the below say _list, in the semantic rules we # disambiguate tuples, and sets from lists list ::= {expr}^n BUILD_LIST_n list ::= {expr}^n BUILD_TUPLE_n list ::= {expr}^n BUILD_LIST_UNPACK_n list ::= {expr}^n BUILD_TUPLE_UNPACK_n # FIXME: list ::= {expr}^n BUILD_SET_n list ::= {expr}^n BUILD_SET_UNPACK_n should be build_set ::= {expr}^n BUILD_SET_n build_set ::= {expr}^n BUILD_SET_UNPACK_n load_closure ::= {LOAD_CLOSURE}^n BUILD_TUPLE_n # call (see custom_classfunc_rule) # ------------ # Python <= 3.2 omits LOAD_CONST before MAKE_ # Note: are the below specific instances of a more general case? # ------------ # Is there something more general than this? adding pos_arg? # Is there something corresponding using MAKE_CLOSURE? dict_comp ::= LOAD_DICTCOMP [LOAD_CONST] MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 generator_exp ::= {pos_arg}^n load_genexpr [LOAD_CONST] MAKE_FUNCTION_n expr GET_ITER CALL_FUNCTION_1 generator_exp ::= {expr}^n load_closure LOAD_GENEXPR [LOAD_CONST] MAKE_CLOSURE_n expr GET_ITER CALL_FUNCTION_1 listcomp ::= {pos_arg}^n LOAD_LISTCOMP [LOAD_CONST] MAKE_CLOSURE_n expr GET_ITER CALL_FUNCTION_1 listcomp ::= {pos_arg}^n load_closure LOAD_LISTCOMP [LOAD_CONST] MAKE_CLOSURE_n expr GET_ITER CALL_FUNCTION_1 # Is there something more general than this? adding pos_arg? # Is there something corresponding using MAKE_CLOSURE? For example: # set_comp ::= {pos_arg}^n LOAD_SETCOMP [LOAD_CONST] MAKE_CLOSURE_n GET_ITER CALL_FUNCTION_1 set_comp ::= LOAD_SETCOMP [LOAD_CONST] MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 set_comp ::= {pos_arg}^n load_closure LOAD_SETCOMP [LOAD_CONST] MAKE_CLOSURE_n expr GET_ITER CALL_FUNCTION_1 mkfunc ::= {pos_arg}^n load_closure [LOAD_CONST] MAKE_FUNCTION_n mkfunc ::= {pos_arg}^n load_closure [LOAD_CONST] MAKE_CLOSURE_n mkfunc ::= {pos_arg}^n [LOAD_CONST] MAKE_FUNCTION_n mklambda ::= {pos_arg}^n LOAD_LAMBDA [LOAD_CONST] MAKE_FUNCTION_n For PYPY: load_attr ::= expr LOOKUP_METHOD call ::= expr CALL_METHOD """ is_pypy = False seen_LOAD_BUILD_CLASS = False seen_GET_AWAITABLE_YIELD_FROM = False # Loop over instructions adding custom grammar rules based on # a specific instruction seen. if 'PyPy' in customize: is_pypy = True self.addRule(""" stmt ::= assign3_pypy stmt ::= assign2_pypy assign3_pypy ::= expr expr expr store store store assign2_pypy ::= expr expr store store return_if_lambda ::= RETURN_END_IF_LAMBDA return_stmt_lambda ::= ret_expr RETURN_VALUE_LAMBDA stmt ::= conditional_lambda conditional_lambda ::= expr jmp_false expr return_if_lambda return_stmt_lambda LAMBDA_MARKER """, nop_func) has_get_iter_call_function1 = False n = len(tokens) max_branches = 0 for i, token in enumerate(tokens): if token == 'GET_ITER' and i < n-2 and self.call_fn_name(tokens[i+1]) == 'CALL_FUNCTION_1': has_get_iter_call_function1 = True max_branches += 1 elif (token == 'GET_AWAITABLE' and i < n-3 and tokens[i+1] == 'LOAD_CONST' and tokens[i+2] == 'YIELD_FROM'): max_branches += 1 seen_GET_AWAITABLE_YIELD_FROM = True if max_branches > 2: break for i, token in enumerate(tokens): opname = token.kind opname_base = opname[:opname.rfind('_')] # The order of opname listed is roughly sorted below if opname_base == 'BUILD_CONST_KEY_MAP': # This is in 3.6+ kvlist_n = 'expr ' * (token.attr) rule = "dict ::= %sLOAD_CONST %s" % (kvlist_n, opname) self.add_unique_rule(rule, opname, token.attr, customize) elif opname.startswith('BUILD_LIST_UNPACK'): v = token.attr rule = ('build_list_unpack ::= ' + 'expr1024 ' * int(v//1024) + 'expr32 ' * int((v//32) % 32) + 'expr ' * (v % 32) + opname) self.add_unique_rule(rule, opname, token.attr, customize) rule = 'expr ::= build_list_unpack' self.add_unique_rule(rule, opname, token.attr, customize) elif opname_base == 'BUILD_MAP': kvlist_n = "kvlist_%s" % token.attr if opname == 'BUILD_MAP_n': # PyPy sometimes has no count. Sigh. rule = ('dictcomp_func ::= BUILD_MAP_n LOAD_FAST FOR_ITER store ' 'comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST') self.add_unique_rule(rule, 'dictomp_func', 1, customize) kvlist_n = 'kvlist_n' rule = 'kvlist_n ::= kvlist_n kv3' self.add_unique_rule(rule, 'kvlist_n', 0, customize) rule = 'kvlist_n ::=' self.add_unique_rule(rule, 'kvlist_n', 1, customize) rule = "dict ::= BUILD_MAP_n kvlist_n" elif self.version >= 3.5: if opname != 'BUILD_MAP_WITH_CALL': if opname == 'BUILD_MAP_UNPACK': rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2) self.add_unique_rule(rule, opname, token.attr, customize) rule = 'dict_entry ::= ' + 'expr ' * (token.attr*2) self.add_unique_rule(rule, opname, token.attr, customize) rule = 'dict ::= ' + 'dict_entry ' * token.attr self.add_unique_rule(rule, opname, token.attr, customize) rule = ('unmap_dict ::= ' + ('dict ' * token.attr) + 'BUILD_MAP_UNPACK') else: rule = kvlist_n + ' ::= ' + 'expr ' * (token.attr*2) self.add_unique_rule(rule, opname, token.attr, customize) rule = "dict ::= %s %s" % (kvlist_n, opname) else: rule = kvlist_n + ' ::= ' + 'expr expr STORE_MAP ' * token.attr self.add_unique_rule(rule, opname, token.attr, customize) rule = "dict ::= %s %s" % (opname, kvlist_n) self.add_unique_rule(rule, opname, token.attr, customize) elif opname.startswith('BUILD_MAP_UNPACK_WITH_CALL'): v = token.attr rule = ('build_map_unpack_with_call ::= ' + 'expr1024 ' * int(v//1024) + 'expr32 ' * int((v//32) % 32) + 'expr ' * (v % 32) + opname) self.add_unique_rule(rule, opname, token.attr, customize) elif opname_base in ('BUILD_LIST', 'BUILD_SET', 'BUILD_TUPLE'): v = token.attr is_LOAD_CLOSURE = False if opname_base == 'BUILD_TUPLE': # If is part of a "load_closure", then it is not part of a # "list". is_LOAD_CLOSURE = True for j in range(v): if tokens[i-j-1].kind != 'LOAD_CLOSURE': is_LOAD_CLOSURE = False break if is_LOAD_CLOSURE: rule = ('load_closure ::= %s%s' % (('LOAD_CLOSURE ' * v), opname)) self.add_unique_rule(rule, opname, token.attr, customize) if not is_LOAD_CLOSURE or v == 0: rule = ('list ::= ' + 'expr1024 ' * int(v//1024) + 'expr32 ' * int((v//32) % 32) + 'expr ' * (v % 32) + opname) self.add_unique_rule(rule, opname, token.attr, customize) continue elif opname_base == 'BUILD_SLICE': if token.attr == 2: self.add_unique_rules([ 'expr ::= build_slice2', 'build_slice2 ::= expr expr BUILD_SLICE_2' ], customize) else: assert token.attr == 3, "BUILD_SLICE value must be 2 or 3; is %s" % v self.add_unique_rules([ 'expr ::= build_slice3', 'build_slice3 ::= expr expr expr BUILD_SLICE_3', ], customize) elif opname.startswith('BUILD_TUPLE_UNPACK_WITH_CALL'): v = token.attr rule = ('build_tuple_unpack_with_call ::= ' + 'expr1024 ' * int(v//1024) + 'expr32 ' * int((v//32) % 32) + 'expr ' * (v % 32) + opname) self.add_unique_rule(rule, opname, token.attr, customize) elif (opname in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_EX_KW') or opname.startswith('CALL_FUNCTION_KW')): self.custom_classfunc_rule(opname, token, customize, seen_LOAD_BUILD_CLASS, seen_GET_AWAITABLE_YIELD_FROM, tokens[i+1]) elif opname_base == 'CALL_METHOD': # PyPy only - DRY with parse2 # FIXME: The below argument parsing will be wrong when PyPy gets to 3.6 args_pos = (token.attr & 0xff) # positional parameters args_kw = (token.attr >> 8) & 0xff # keyword parameters # number of apply equiv arguments: nak = ( len(opname_base)-len('CALL_METHOD') ) // 3 rule = ('call ::= expr ' + ('pos_arg ' * args_pos) + ('kwarg ' * args_kw) + 'expr ' * nak + opname) self.add_unique_rule(rule, opname, token.attr, customize) elif opname == 'JUMP_IF_NOT_DEBUG': v = token.attr self.add_unique_rule( "stmt ::= assert_pypy", opname, v, customize) self.add_unique_rule( "stmt ::= assert2_pypy", opname_base, v, customize) self.add_unique_rule( "assert_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true " "LOAD_ASSERT RAISE_VARARGS_1 COME_FROM", opname, token.attr, customize) self.add_unique_rule( "assert2_pypy ::= JUMP_IF_NOT_DEBUG assert_expr jmp_true " "LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 COME_FROM", opname_base, v, customize) continue elif opname == 'LOAD_BUILD_CLASS': seen_LOAD_BUILD_CLASS = True self.custom_build_class_rule(opname, i, token, tokens, customize) elif opname == 'LOAD_CLASSDEREF': # Python 3.4+ self.add_unique_rule("expr ::= LOAD_CLASSDEREF", opname, token.attr, customize) continue elif opname == 'LOAD_CLASSNAME': self.add_unique_rule("expr ::= LOAD_CLASSNAME", opname, token.attr, customize) continue elif opname == 'LOAD_DICTCOMP': if has_get_iter_call_function1: rule_pat = ("dict_comp ::= LOAD_DICTCOMP %sMAKE_FUNCTION_0 expr " "GET_ITER CALL_FUNCTION_1") self.add_make_function_rule(rule_pat, opname, token.attr, customize) # listcomp is a custom Python3 rule elif opname == 'LOAD_LISTCOMP': self.add_unique_rule("expr ::= listcomp", opname, token.attr, customize) elif opname == 'LOAD_SETCOMP': # Should this be generalized and put under MAKE_FUNCTION? if has_get_iter_call_function1: self.add_unique_rule("expr ::= set_comp", opname, token.attr, customize) rule_pat = ("set_comp ::= LOAD_SETCOMP %sMAKE_FUNCTION_0 expr " "GET_ITER CALL_FUNCTION_1") self.add_make_function_rule(rule_pat, opname, token.attr, customize) elif opname == 'LOOKUP_METHOD': # A PyPy speciality - DRY with parse2 self.add_unique_rule("load_attr ::= expr LOOKUP_METHOD", opname, token.attr, customize) continue elif opname.startswith('MAKE_CLOSURE'): # DRY with MAKE_FUNCTION # Note: this probably doesn't handle kwargs proprerly args_pos, args_kw, annotate_args = token.attr # FIXME: Fold test into add_make_function_rule j = 1 if self.version < 3.3 else 2 if is_pypy or (i >= j and tokens[i-j] == 'LOAD_LAMBDA'): rule_pat = ('mklambda ::= %sload_closure LOAD_LAMBDA %%s%s' % ('pos_arg '* args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if has_get_iter_call_function1: rule_pat = ("generator_exp ::= %sload_closure load_genexpr %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ('pos_arg '* args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if has_get_iter_call_function1: if (is_pypy or (i >= j and tokens[i-j] == 'LOAD_LISTCOMP')): # In the tokens we saw: # LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION (>= 3.3) or # LOAD_LISTCOMP MAKE_FUNCTION (< 3.3) or # and have GET_ITER CALL_FUNCTION_1 # Todo: For Pypy we need to modify this slightly rule_pat = ('listcomp ::= %sload_closure LOAD_LISTCOMP %%s%s expr ' 'GET_ITER CALL_FUNCTION_1' % ('pos_arg ' * args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if (is_pypy or (i >= j and tokens[i-j] == 'LOAD_SETCOMP')): rule_pat = ('set_comp ::= %sload_closure LOAD_SETCOMP %%s%s expr ' 'GET_ITER CALL_FUNCTION_1' % ('pos_arg ' * args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if (is_pypy or (i >= j and tokens[i-j] == 'LOAD_DICTCOMP')): self.add_unique_rule('dict_comp ::= %sload_closure LOAD_DICTCOMP %s ' 'expr GET_ITER CALL_FUNCTION_1' % ('pos_arg '* args_pos, opname), opname, token.attr, customize) # FIXME: kwarg processing is missing here. # Note order of kwargs and pos args changed between 3.3-3.4 if self.version <= 3.2: rule = ('mkfunc ::= kwargs %sload_closure LOAD_CONST kwargs %s' % ('expr ' * args_pos, opname)) elif self.version == 3.3: rule = ('mkfunc ::= kwargs %sload_closure LOAD_CONST LOAD_CONST %s' % ('expr ' * args_pos, opname)) elif self.version >= 3.4: rule = ('mkfunc ::= %skwargs load_closure LOAD_CONST LOAD_CONST %s' % ('expr ' * args_pos, opname)) self.add_unique_rule(rule, opname, token.attr, customize) rule = ('mkfunc ::= %sload_closure load_genexpr %s' % ('pos_arg ' * args_pos, opname)) self.add_unique_rule(rule, opname, token.attr, customize) if self.version < 3.4: rule = ('mkfunc ::= %sload_closure LOAD_CONST %s' % ('expr ' * args_pos, opname)) self.add_unique_rule(rule, opname, token.attr, customize) pass elif opname_base.startswith('MAKE_FUNCTION'): # DRY with MAKE_CLOSURE if self.version >= 3.6: # The semantics of MAKE_FUNCTION in 3.6 are totally different from # before. args_pos, args_kw, annotate_args, closure = token.attr stack_count = args_pos + args_kw + annotate_args rule = ('mkfunc ::= %s%s%s%s' % ('expr ' * stack_count, 'load_closure ' * closure, 'LOAD_CONST ' * 2, opname)) self.add_unique_rule(rule, opname, token.attr, customize) if has_get_iter_call_function1: rule_pat = ("generator_exp ::= %sload_closure load_genexpr %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ('pos_arg '* args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if is_pypy or (i >= 2 and tokens[i-2] == 'LOAD_LISTCOMP'): rule_pat = ("listcomp ::= %sLOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ('expr ' * args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if is_pypy or (i >= 2 and tokens[i-2] == 'LOAD_LAMBDA'): rule_pat = ('mklambda ::= %s%sLOAD_LAMBDA %%s%s' % (('pos_arg '* args_pos), ('kwarg '* args_kw), opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) continue if self.version < 3.6: args_pos, args_kw, annotate_args = token.attr else: args_pos, args_kw, annotate_args, closure = token.attr j = 1 if self.version < 3.3 else 2 if has_get_iter_call_function1: rule_pat = ("generator_exp ::= %sload_genexpr %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ('pos_arg '* args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if is_pypy or (i >= j and tokens[i-j] == 'LOAD_LISTCOMP'): # In the tokens we saw: # LOAD_LISTCOMP LOAD_CONST MAKE_FUNCTION (>= 3.3) or # LOAD_LISTCOMP MAKE_FUNCTION (< 3.3) or # and have GET_ITER CALL_FUNCTION_1 # Todo: For Pypy we need to modify this slightly rule_pat = ("listcomp ::= %sLOAD_LISTCOMP %%s%s expr " "GET_ITER CALL_FUNCTION_1" % ('expr ' * args_pos, opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) # FIXME: Fold test into add_make_function_rule if is_pypy or (i >= j and tokens[i-j] == 'LOAD_LAMBDA'): rule_pat = ('mklambda ::= %s%sLOAD_LAMBDA %%s%s' % (('pos_arg '* args_pos), ('kwarg '* args_kw), opname)) self.add_make_function_rule(rule_pat, opname, token.attr, customize) if self.version == 3.3: # positional args after keyword args rule = ('mkfunc ::= kwargs %s%s %s' % ('pos_arg ' * args_pos, 'LOAD_CONST '*2, opname)) elif self.version > 3.3: # positional args before keyword args rule = ('mkfunc ::= %skwargs %s %s' % ('pos_arg ' * args_pos, 'LOAD_CONST '*2, opname)) else: rule = ('mkfunc ::= kwargs %sexpr %s' % ('pos_arg ' * args_pos, opname)) self.add_unique_rule(rule, opname, token.attr, customize) if opname.startswith('MAKE_FUNCTION_A'): if self.version >= 3.6: rule = ('mkfunc_annotate ::= %s%sannotate_tuple LOAD_CONST LOAD_CONST %s' % (('pos_arg ' * (args_pos)), ('call ' * (annotate_args-1)), opname)) self.add_unique_rule(rule, opname, token.attr, customize) rule = ('mkfunc_annotate ::= %s%sannotate_tuple LOAD_CONST LOAD_CONST %s' % (('pos_arg ' * (args_pos)), ('annotate_arg ' * (annotate_args-1)), opname)) if self.version >= 3.3: # Normally we remove EXTENDED_ARG from the opcodes, but in the case of # annotated functions can use the EXTENDED_ARG tuple to signal we have an annotated function. # Yes this is a little hacky rule = ('mkfunc_annotate ::= %s%sannotate_tuple LOAD_CONST LOAD_CONST EXTENDED_ARG %s' % (('pos_arg ' * (args_pos)), ('call ' * (annotate_args-1)), opname)) self.add_unique_rule(rule, opname, token.attr, customize) rule = ('mkfunc_annotate ::= %s%sannotate_tuple LOAD_CONST LOAD_CONST EXTENDED_ARG %s' % (('pos_arg ' * (args_pos)), ('annotate_arg ' * (annotate_args-1)), opname)) else: # See above comment about use of EXTENDED_ARG rule = ('mkfunc_annotate ::= %s%sannotate_tuple LOAD_CONST EXTENDED_ARG %s' % (('pos_arg ' * (args_pos)), ('annotate_arg ' * (annotate_args-1)), opname)) self.add_unique_rule(rule, opname, token.attr, customize) rule = ('mkfunc_annotate ::= %s%sannotate_tuple LOAD_CONST EXTENDED_ARG %s' % (('pos_arg ' * (args_pos)), ('call ' * (annotate_args-1)), opname)) self.add_unique_rule(rule, opname, token.attr, customize) elif opname_base in ('UNPACK_EX',): before_count, after_count = token.attr rule = 'unpack ::= ' + opname + ' store' * (before_count + after_count + 1) self.add_unique_rule(rule, opname, token.attr, customize) elif opname_base in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): rule = 'unpack ::= ' + opname + ' store' * token.attr self.add_unique_rule(rule, opname, token.attr, customize) elif opname_base == 'UNPACK_LIST': rule = 'unpack_list ::= ' + opname + ' store' * token.attr self.check_reduce['aug_assign1'] = 'AST' self.check_reduce['aug_assign2'] = 'AST' self.check_reduce['while1stmt'] = 'noAST' self.check_reduce['annotate_tuple'] = 'noAST' self.check_reduce['kwarg'] = 'noAST' # FIXME: remove parser errors caused by the below # self.check_reduce['while1elsestmt'] = 'noAST' return def reduce_is_invalid(self, rule, ast, tokens, first, last): lhs = rule[0] if lhs in ('aug_assign1', 'aug_assign2') and ast[0][0] == 'and': return True elif lhs == 'annotate_tuple': return not isinstance(tokens[first].attr, tuple) elif lhs == 'kwarg': arg = tokens[first].attr if PYTHON3: return not isinstance(arg, str) else: return not (isinstance(arg, str) or isinstance(arg, unicode)) elif lhs == 'while1elsestmt': # if SETUP_LOOP target spans the else part, then this is # not while1else. Also do for whileTrue? last += 1 while isinstance(tokens[last].offset, str): last += 1 return tokens[first].attr == tokens[last].offset elif lhs == 'while1stmt': if (0 <= last < len(tokens) and tokens[last] in ('COME_FROM_LOOP', 'JUMP_BACK')): # jump_back should be right afer SETUP_LOOP. Test? last += 1 while last < len(tokens) and isinstance(tokens[last].offset, str): last += 1 if last < len(tokens): offset = tokens[last].offset assert tokens[first] == 'SETUP_LOOP' if offset != tokens[first].attr: return True return False return False class Python30Parser(Python3Parser): def p_30(self, args): """ # Store locals is only in Python 3.0 to 3.3 stmt ::= store_locals store_locals ::= LOAD_FAST STORE_LOCALS jmp_true ::= JUMP_IF_TRUE_OR_POP POP_TOP _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD POP_TOP COME_FROM """ class Python3ParserSingle(Python3Parser, PythonParserSingle): pass def info(args): # Check grammar p = Python3Parser() if len(args) > 0: arg = args[0] if arg == '3.5': from uncompyle6.parser.parse35 import Python35Parser p = Python35Parser() elif arg == '3.3': from uncompyle6.parser.parse33 import Python33Parser p = Python33Parser() elif arg == '3.2': from uncompyle6.parser.parse32 import Python32Parser p = Python32Parser() elif arg == '3.0': p = Python30Parser() p.check_grammar() if len(sys.argv) > 1 and sys.argv[1] == 'dump': print('-' * 50) p.dump_grammar() if __name__ == '__main__': import sys info(sys.argv)