# Copyright (c) 2016-2017, 2019, 2021, 2023-2024 # Rocky Bernstein """ spark grammar differences over Python 3.4 for Python 3.5. """ from __future__ import print_function from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parser import PythonParserSingle, nop_func from uncompyle6.parsers.parse34 import Python34Parser class Python35Parser(Python34Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python35Parser, self).__init__(debug_parser) self.customized = {} def p_35on(self, args): """ # FIXME! isolate this to only loops! _ifstmts_jump ::= c_stmts_opt come_froms ifelsestmt ::= testexpr c_stmts_opt jump_forward_else else_suite _come_froms pb_ja ::= POP_BLOCK JUMP_ABSOLUTE # The number of canned instructions in new statements is mind boggling. # I'm sure by the time Python 4 comes around these will be turned # into special opcodes while1stmt ::= SETUP_LOOP l_stmts COME_FROM JUMP_BACK POP_BLOCK COME_FROM_LOOP while1stmt ::= SETUP_LOOP l_stmts POP_BLOCK COME_FROM_LOOP while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK POP_BLOCK else_suite COME_FROM_LOOP # The following rule is for Python 3.5+ where we can have stuff like # while .. # if # ... # the end of the if will jump back to the loop and there will be a COME_FROM # after the jump l_stmts ::= lastl_stmt come_froms l_stmts # Python 3.5+ Await statement expr ::= await_expr await_expr ::= expr GET_AWAITABLE LOAD_CONST YIELD_FROM stmt ::= await_stmt await_stmt ::= await_expr POP_TOP # Python 3.5+ has WITH_CLEANUP_START/FINISH with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY # Python 3.5+ async additions stmt ::= async_for_stmt async_for_stmt ::= SETUP_LOOP expr GET_AITER LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM store POP_BLOCK jump_except COME_FROM_EXCEPT DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK JUMP_ABSOLUTE END_FINALLY COME_FROM for_block POP_BLOCK JUMP_ABSOLUTE COME_FROM_LOOP async_for_stmt ::= SETUP_LOOP expr GET_AITER LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM store POP_BLOCK jump_except COME_FROM_EXCEPT DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK JUMP_ABSOLUTE END_FINALLY JUMP_BACK pass POP_BLOCK JUMP_ABSOLUTE COME_FROM_LOOP stmt ::= async_forelse_stmt async_forelse_stmt ::= SETUP_LOOP expr GET_AITER LOAD_CONST YIELD_FROM SETUP_EXCEPT GET_ANEXT LOAD_CONST YIELD_FROM store POP_BLOCK JUMP_FORWARD COME_FROM_EXCEPT DUP_TOP LOAD_GLOBAL COMPARE_OP POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP POP_EXCEPT POP_BLOCK JUMP_ABSOLUTE END_FINALLY COME_FROM for_block pb_ja else_suite COME_FROM_LOOP inplace_op ::= INPLACE_MATRIX_MULTIPLY binary_operator ::= BINARY_MATRIX_MULTIPLY # Python 3.5+ does jump optimization # In <.3.5 the below is a JUMP_FORWARD to a JUMP_ABSOLUTE. return_if_lambda ::= RETURN_END_IF_LAMBDA COME_FROM return ::= return_expr RETURN_END_IF jb_else ::= JUMP_BACK ELSE ifelsestmtc ::= testexpr c_stmts_opt JUMP_FORWARD else_suitec ifelsestmtl ::= testexpr c_stmts_opt jb_else else_suitel # 3.5 Has jump optimization which can route the end of an # "if/then" back to a loop just before an else. jump_absolute_else ::= jb_else jump_absolute_else ::= CONTINUE ELSE # Our hacky "ELSE" determination doesn't do a good job and really # determine the start of an "else". It could also be the end of an # "if-then" which ends in a "continue". Perhaps with real control-flow # analysis we'll sort this out. Or call "ELSE" something more appropriate. _ifstmts_jump ::= c_stmts_opt ELSE # ifstmt ::= testexpr c_stmts_opt iflaststmt ::= testexpr c_stmts_opt JUMP_FORWARD # Python 3.3+ also has yield from. 3.5 does it # differently than 3.3, 3.4 yield_from ::= expr GET_YIELD_FROM_ITER LOAD_CONST YIELD_FROM """ def customize_grammar_rules(self, tokens, customize): self.remove_rules( """ yield_from ::= expr GET_ITER LOAD_CONST YIELD_FROM yield_from ::= expr expr YIELD_FROM with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY """ ) super(Python35Parser, self).customize_grammar_rules(tokens, customize) for i, token in enumerate(tokens): opname = token.kind if opname == "LOAD_ASSERT": if "PyPy" in customize: rules_str = """ stmt ::= JUMP_IF_NOT_DEBUG stmts COME_FROM """ self.add_unique_doc_rules(rules_str, customize) # FIXME: I suspect this is wrong for 3.6 and 3.5, but # I haven't verified what the 3.7ish fix is elif opname == "BUILD_MAP_UNPACK_WITH_CALL": if self.version < (3, 7): self.addRule("expr ::= unmapexpr", nop_func) nargs = token.attr % 256 map_unpack_n = "map_unpack_%s" % nargs rule = map_unpack_n + " ::= " + "expr " * (nargs) self.addRule(rule, nop_func) rule = "unmapexpr ::= %s %s" % (map_unpack_n, opname) self.addRule(rule, nop_func) call_token = tokens[i + 1] rule = "call ::= expr unmapexpr " + call_token.kind self.addRule(rule, nop_func) elif opname == "BEFORE_ASYNC_WITH" and self.version < (3, 8): # Some Python 3.5+ async additions rules_str = """ stmt ::= async_with_stmt async_with_pre ::= BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM SETUP_ASYNC_WITH async_with_post ::= COME_FROM_ASYNC_WITH WITH_CLEANUP_START GET_AWAITABLE LOAD_CONST YIELD_FROM WITH_CLEANUP_FINISH END_FINALLY async_with_stmt ::= expr async_with_pre POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST async_with_post async_with_stmt ::= expr async_with_pre POP_TOP suite_stmts_opt async_with_post stmt ::= async_with_as_stmt async_with_as_stmt ::= expr async_with_pre store suite_stmts_opt POP_BLOCK LOAD_CONST async_with_post """ self.addRule(rules_str, nop_func) elif opname == "BUILD_MAP_UNPACK": self.addRule( """ expr ::= dict_unpack dict_unpack ::= dict_comp BUILD_MAP_UNPACK """, nop_func, ) elif opname == "SETUP_WITH": # Python 3.5+ has WITH_CLEANUP_START/FINISH rules_str = """ with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY with_as ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY """ self.addRule(rules_str, nop_func) pass return def custom_classfunc_rule(self, opname, token, customize, *args): args_pos, args_kw = self.get_pos_kw(token) # Additional exprs for * and ** args: # 0 if neither # 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW # 2 for * and ** args (CALL_FUNCTION_VAR_KW). # Yes, this computation based on instruction name is a little bit hoaky. nak = (len(opname) - len("CALL_FUNCTION")) // 3 uniq_param = args_kw + args_pos if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops): rule = ( "async_call ::= expr " + ("pos_arg " * args_pos) + ("kwarg " * args_kw) + "expr " * nak + token.kind + " GET_AWAITABLE LOAD_CONST YIELD_FROM" ) self.add_unique_rule(rule, token.kind, uniq_param, customize) self.add_unique_rule( "expr ::= async_call", token.kind, uniq_param, customize ) if opname.startswith("CALL_FUNCTION_VAR"): # Python 3.5 changes the stack position of *args. KW args come # after *args. # Note: Python 3.6+ replaces CALL_FUNCTION_VAR and # CALL_FUNCTION_VAR_KW with CALL_FUNCTION_EX token.kind = self.call_fn_name(token) if opname.endswith("KW"): kw = "expr " else: kw = "" rule = ( "call ::= expr expr " + ("pos_arg " * args_pos) + ("kwarg " * args_kw) + kw + token.kind ) # Note: semantic actions make use of the fact of whether "args_pos" # zero or not in creating a template rule. self.add_unique_rule(rule, token.kind, args_pos, customize) else: super(Python35Parser, self).custom_classfunc_rule( opname, token, customize, *args ) class Python35ParserSingle(Python35Parser, PythonParserSingle): pass if __name__ == "__main__": # Check grammar p = Python35Parser() p.check_grammar() from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE if PYTHON_VERSION_TRIPLE[:2] == (3, 5): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) opcode_set = set(s.opc.opname).union( set( """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME LAMBDA_MARKER RETURN_LAST """.split() ) ) remain_tokens = set(tokens) - opcode_set import re remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items()))