# Copyright (c) 2017-2023 Rocky Bernstein """ spark grammar differences over Python2 for Python 2.6. """ from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parser import PythonParserSingle from uncompyle6.parsers.parse2 import Python2Parser from uncompyle6.parsers.reducecheck import ( except_handler, ifelsestmt2, ifstmt2, tryelsestmt, tryexcept, ) class Python26Parser(Python2Parser): def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python26Parser, self).__init__(debug_parser) self.customized = {} def p_try_except26(self, args): """ except_stmt ::= except_cond3 except_suite except_cond1 ::= DUP_TOP expr COMPARE_OP JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP POP_TOP except_cond3 ::= DUP_TOP expr COMPARE_OP JUMP_IF_FALSE POP_TOP POP_TOP store POP_TOP except_handler ::= JUMP_FORWARD COME_FROM except_stmts come_froms_pop END_FINALLY come_froms except_handler ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY except_handler ::= JUMP_FORWARD COME_FROM except_stmts POP_TOP END_FINALLY come_froms except_handler ::= jmp_abs COME_FROM except_stmts POP_TOP END_FINALLY except_handler ::= jmp_abs COME_FROM except_stmts END_FINALLY JUMP_FORWARD # Sometimes we don't put in COME_FROM to the next statement # like we do in 2.7. Perhaps we should? try_except ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK except_handler tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK except_handler else_suite come_froms tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK except_handler else_suitel tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK except_handler else_suitec _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM POP_TOP except_suite ::= c_stmts_opt JUMP_FORWARD come_from_pop except_suite ::= c_stmts_opt jf_pop except_suite ::= c_stmts_opt jmp_abs come_from_pop # This is what happens after a jump where # we start a new block. For reasons that I don't fully # understand, there is also a value on the top of the stack. come_from_pop ::= COME_FROM POP_TOP come_froms_pop ::= come_froms POP_TOP """ # In contrast to Python 2.7, Python 2.6 has a lot of # POP_TOP's which come right after various jumps. # The COME_FROM instructions our scanner adds, here it is to assist # distinguishing the extraneous POP_TOPs from those that start # after one of these jumps def p_jumps26(self, args): """ # There are the equivalents of Python 2.7+'s # POP_JUMP_IF_TRUE and POP_JUMP_IF_FALSE jmp_true ::= JUMP_IF_TRUE POP_TOP jmp_false ::= JUMP_IF_FALSE POP_TOP jb_pop ::= JUMP_BACK POP_TOP jf_pop ::= JUMP_FORWARD POP_TOP jb_cont ::= JUMP_BACK jb_cont ::= CONTINUE jb_cf_pop ::= come_from_opt JUMP_BACK _come_froms POP_TOP ja_cf_pop ::= JUMP_ABSOLUTE come_froms POP_TOP jf_cf_pop ::= JUMP_FORWARD come_froms POP_TOP # The first optional COME_FROM when it appears is really # COME_FROM_LOOP, but in <= 2.6 we don't distinguish # this cf_jb_cf_pop ::= _come_froms JUMP_BACK come_froms POP_TOP pb_come_from ::= POP_BLOCK COME_FROM jb_pb_come_from ::= JUMP_BACK pb_come_from _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM POP_TOP _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD come_froms POP_TOP COME_FROM # This is what happens after a jump where # we start a new block. For reasons that I don't fully # understand, there is also a value on the top of the stack. come_froms_pop ::= come_froms POP_TOP """ def p_stmt26(self, args): """ stmt ::= ifelsestmtr # We use filler as a placeholder to keep nonterminal positions # the same across different grammars so that the same semantic actions # can be used filler ::= assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 come_froms_pop assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_2 come_froms_pop break ::= BREAK_LOOP JUMP_BACK # Semantic actions want else_suitel to be at index 3 ifelsestmtl ::= testexpr c_stmts_opt cf_jb_cf_pop else_suitel ifelsestmtc ::= testexpr c_stmts_opt ja_cf_pop else_suitec # Semantic actions want suite_stmts_opt to be at index 3 with ::= expr setupwith SETUP_FINALLY suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY # Semantic actions want store to be at index 2 withasstmt ::= expr setupwithas store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY # This is truly weird. 2.7 does this (not including POP_TOP) with # opcode SETUP_WITH setupwith ::= DUP_TOP LOAD_ATTR ROT_TWO LOAD_ATTR CALL_FUNCTION_0 POP_TOP setupwithas ::= DUP_TOP LOAD_ATTR ROT_TWO LOAD_ATTR CALL_FUNCTION_0 setup_finally setup_finally ::= STORE_FAST SETUP_FINALLY LOAD_FAST DELETE_FAST setup_finally ::= STORE_NAME SETUP_FINALLY LOAD_NAME DELETE_NAME while1stmt ::= SETUP_LOOP l_stmts_opt come_from_opt JUMP_BACK _come_froms # Sometimes JUMP_BACK is misclassified as CONTINUE. # workaround until we have better control flow in place while1stmt ::= SETUP_LOOP l_stmts_opt CONTINUE _come_froms whilestmt ::= SETUP_LOOP testexpr l_stmts_opt jb_pop POP_BLOCK _come_froms whilestmt ::= SETUP_LOOP testexpr l_stmts_opt jb_cf_pop pb_come_from whilestmt ::= SETUP_LOOP testexpr l_stmts_opt jb_cf_pop POP_BLOCK whilestmt ::= SETUP_LOOP testexpr returns POP_BLOCK COME_FROM # In the "whilestmt" below, there isn't a COME_FROM when the # "while" is the last thing in the module or function. whilestmt ::= SETUP_LOOP testexpr returns POP_TOP POP_BLOCK whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt jb_pop POP_BLOCK else_suitel COME_FROM while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suitel COME_FROM return ::= return_expr RETURN_END_IF POP_TOP return ::= return_expr RETURN_VALUE POP_TOP return_if_stmt ::= return_expr RETURN_END_IF POP_TOP iflaststmtl ::= testexpr c_stmts_opt jb_cf_pop iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE come_from_pop lastc_stmt ::= iflaststmt come_froms ifstmt ::= testexpr_then _ifstmts_jump # Semantic actions want the else to be at position 3 ifelsestmt ::= testexpr_then c_stmts_opt jf_cf_pop else_suite come_froms ifelsestmt ::= testexpr_then c_stmts_opt filler else_suitel come_froms POP_TOP ifelsestmt ::= testexpr c_stmts_opt jf_cf_pop else_suite # We have no jumps to jumps, so no "come_froms" but a single "COME_FROM" ifelsestmt ::= testexpr c_stmts_opt jf_cf_pop else_suite COME_FROM # Semantic actions want else_suitel to be at index 3 ifelsestmtl ::= testexpr_then c_stmts_opt jb_cf_pop else_suitel ifelsestmtc ::= testexpr_then c_stmts_opt ja_cf_pop else_suitec iflaststmt ::= testexpr_then c_stmts_opt JUMP_ABSOLUTE come_froms POP_TOP iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE come_froms POP_TOP # "if"/"else" statement that ends in a RETURN ifelsestmtr ::= testexpr_then return_if_stmts returns testexpr_then ::= testtrue_then testexpr_then ::= testfalse_then testtrue_then ::= expr jmp_true_then testfalse_then ::= expr jmp_false_then jmp_false_then ::= JUMP_IF_FALSE THEN POP_TOP jmp_true_then ::= JUMP_IF_TRUE THEN POP_TOP # In the "while1stmt" below, there sometimes isn't a # "COME_FROM" when the "while1" is the last thing in the # module or function. while1stmt ::= SETUP_LOOP returns come_from_opt for_block ::= returns _come_froms """ def p_comp26(self, args): """ list_for ::= expr for_iter store list_iter JUMP_BACK come_froms POP_TOP # The JUMP FORWARD below jumps to the JUMP BACK. It seems to happen # in rare cases that may have to with length of code # FIXME: we can add a reduction check for this list_for ::= expr for_iter store list_iter JUMP_FORWARD come_froms POP_TOP COME_FROM JUMP_BACK list_for ::= expr for_iter store list_iter jb_cont # This is for a really funky: # [ x for x in range(10) if x % 2 if x % 3 ] # the JUMP_ABSOLUTE is to the instruction after the last POP_TOP # we have a reduction check for this list_for ::= expr for_iter store list_iter JUMP_ABSOLUTE come_froms POP_TOP jb_pop list_iter ::= list_if JUMP_BACK list_iter ::= list_if JUMP_BACK COME_FROM POP_TOP list_comp ::= BUILD_LIST_0 DUP_TOP store list_iter delete list_comp ::= BUILD_LIST_0 DUP_TOP store list_iter JUMP_BACK delete lc_body ::= LOAD_NAME expr LIST_APPEND lc_body ::= LOAD_FAST expr LIST_APPEND comp_for ::= SETUP_LOOP expr for_iter store comp_iter jb_pb_come_from comp_iter ::= comp_if_not comp_if_not ::= expr jmp_true comp_iter comp_body ::= gen_comp_body for_block ::= l_stmts_opt _come_froms POP_TOP JUMP_BACK # Make sure we keep indices the same as 2.7 setup_loop_lf ::= SETUP_LOOP LOAD_FAST genexpr_func ::= setup_loop_lf FOR_ITER store comp_iter jb_pb_come_from genexpr_func ::= setup_loop_lf FOR_ITER store comp_iter JUMP_BACK come_from_pop jb_pb_come_from # This is for a really funky: # (x for x in range(10) if x % 2 if x % 3 ) # the JUMP_ABSOLUTE is to the instruction after the last POP_TOP. # Add a reduction check for this? genexpr_func ::= setup_loop_lf FOR_ITER store comp_iter JUMP_ABSOLUTE come_froms POP_TOP jb_pop jb_pb_come_from genexpr_func ::= setup_loop_lf FOR_ITER store comp_iter JUMP_BACK come_froms POP_TOP jb_pb_come_from generator_exp ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 COME_FROM list_if ::= expr jmp_false_then list_iter """ def p_ret26(self, args): """ ret_and ::= expr jmp_false return_expr_or_cond COME_FROM ret_or ::= expr jmp_true return_expr_or_cond COME_FROM if_exp_ret ::= expr jmp_false_then expr RETURN_END_IF POP_TOP return_expr_or_cond if_exp_ret ::= expr jmp_false_then expr return_expr_or_cond return_if_stmt ::= return_expr RETURN_END_IF POP_TOP return ::= return_expr RETURN_VALUE POP_TOP # FIXME: split into Python 2.5 ret_or ::= expr jmp_true return_expr_or_cond come_froms """ def p_except26(self, args): """ except_suite ::= c_stmts_opt jmp_abs POP_TOP """ def p_misc26(self, args): """ dict ::= BUILD_MAP kvlist kvlist ::= kvlist kv3 # Note: preserve positions 0 2 and 4 for semantic actions if_exp_not ::= expr jmp_true expr jf_cf_pop expr COME_FROM if_exp ::= expr jmp_false expr jf_cf_pop expr come_from_opt if_exp ::= expr jmp_false expr ja_cf_pop expr expr ::= if_exp_not and ::= expr JUMP_IF_FALSE POP_TOP expr JUMP_IF_FALSE POP_TOP # A "compare_chained" is two comparisons like x <= y <= z compare_chained ::= expr compared_chained_middle ROT_TWO COME_FROM POP_TOP _come_froms compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false compared_chained_middle _come_froms compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false compare_chained_right _come_froms compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false_then compared_chained_middle _come_froms compared_chained_middle ::= expr DUP_TOP ROT_THREE COMPARE_OP jmp_false_then compare_chained_right _come_froms compare_chained_right ::= expr COMPARE_OP return_expr_lambda compare_chained_right ::= expr COMPARE_OP RETURN_END_IF_LAMBDA compare_chained_right ::= expr COMPARE_OP RETURN_END_IF COME_FROM return_if_lambda ::= RETURN_END_IF_LAMBDA POP_TOP stmt ::= if_exp_lambda stmt ::= if_exp_not_lambda if_exp_lambda ::= expr jmp_false_then expr return_if_lambda return_stmt_lambda LAMBDA_MARKER if_exp_not_lambda ::= expr jmp_true_then expr return_if_lambda return_stmt_lambda LAMBDA_MARKER # if_exp_true are for conditions which always evaluate true # There is dead or non-optional remnants of the condition code though, # and we use that to match on to reconstruct the source more accurately expr ::= if_exp_true if_exp_true ::= expr jf_pop expr COME_FROM # This comes from # 0 or max(5, 3) if 0 else 3 # where there seems to be an additional COME_FROM at the # end. Not sure if this is appropriately named or # is the best way to handle expr ::= if_exp_false if_exp_false ::= if_exp COME_FROM """ def customize_grammar_rules(self, tokens, customize): self.remove_rules( """ withasstmt ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY """ ) super(Python26Parser, self).customize_grammar_rules(tokens, customize) self.reduce_check_table = { "except_handler": except_handler, "ifstmt": ifstmt2, "ifelsestmt": ifelsestmt2, "tryelsestmt": tryelsestmt, "try_except": tryexcept, "tryelsestmtl": tryelsestmt, } self.check_reduce["and"] = "AST" self.check_reduce["assert_expr_and"] = "AST" self.check_reduce["except_handler"] = "tokens" self.check_reduce["ifstmt"] = "AST" self.check_reduce["ifelsestmt"] = "AST" self.check_reduce["forelselaststmtl"] = "tokens" self.check_reduce["forelsestmt"] = "tokens" self.check_reduce["list_for"] = "AST" self.check_reduce["try_except"] = "AST" self.check_reduce["tryelsestmt"] = "AST" self.check_reduce["tryelsestmtl"] = "AST" def reduce_is_invalid(self, rule, ast, tokens, first, last): invalid = super(Python26Parser, self).reduce_is_invalid( rule, ast, tokens, first, last ) lhs = rule[0] if invalid or tokens is None: return invalid if rule in ( ("and", ("expr", "jmp_false", "expr", "\\e_come_from_opt")), ("and", ("expr", "jmp_false", "expr", "come_from_opt")), ("assert_expr_and", ("assert_expr", "jmp_false", "expr")), ): # FIXME: workaround profiling bug if ast[1] is None: return False # For now, we won't let the 2nd 'expr' be an "if_exp_not" # However in < 2.6 where we don't have if/else expression it *can* # be. if self.version >= (2, 6) and ast[2][0] == "if_exp_not": return True test_index = last while tokens[test_index].kind == "COME_FROM": test_index += 1 if tokens[test_index].kind.startswith("JUMP_IF"): return False # Test that jmp_false jumps to the end of "and" # or that it jumps to the same place as the end of "and" jmp_false = ast[1][0] jmp_target = jmp_false.offset + jmp_false.attr + 3 return not ( jmp_target == tokens[test_index].offset or tokens[last].pattr == jmp_false.pattr ) elif lhs in ("forelselaststmtl", "forelsestmt"): # print("XXX", first, last) # for t in range(first, last): # print(tokens[t]) # print("=" * 30) # If the SETUP_LOOP jumps to the tokens[last] then # this is a "for" not a "for else". # However, in Python 2.2 and before there is a SET_LINENO # instruction which might have gotten removed. So we need # to account for that. bytecode-1.4/anydbm.pyc exhibits # this behavior. # Also we need to use the setup_loop instruction (not opcode) # since the operand can be a relative offset rather than # an absolute offset. setup_inst = self.insts[self.offset2inst_index[tokens[first].offset]] last = min(len(tokens) - 1, last) if self.version <= (2, 2) and tokens[last] == "COME_FROM": last += 1 return tokens[last - 1].off2int() > setup_inst.argval elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")): for i in range(last - 1, last - 4, -1): t = tokens[i] if t == "JUMP_FORWARD": return t.attr > tokens[min(last, len(tokens) - 1)].off2int() elif t not in ("POP_TOP", "COME_FROM"): break pass pass elif rule == ( "list_for", ( "expr", "for_iter", "store", "list_iter", "JUMP_ABSOLUTE", "come_froms", "POP_TOP", "jb_pop", ), ): # The JUMP_ABSOLUTE has to be to the last POP_TOP or this is invalid ja_attr = ast[4].attr return tokens[last].offset != ja_attr elif lhs == "try_except": # We need to distinguish "try_except" from "tryelsestmt"; we do that # by checking the jump before the END_FINALLY # If we have: # insn # POP_TOP # END_FINALLY # COME_FROM # then insn has to be either a JUMP_FORWARD or a RETURN_VALUE # and if it is JUMP_FORWARD, then it has to be a JUMP_FORWARD to right after # COME_FROM if last == len(tokens): last -= 1 if tokens[last] != "COME_FROM" and tokens[last - 1] == "COME_FROM": last -= 1 if ( tokens[last] == "COME_FROM" and tokens[last - 1] == "END_FINALLY" and tokens[last - 2] == "POP_TOP" ): # A jump of 2 is a jump around POP_TOP, END_FINALLY which # would indicate try/else rather than try return tokens[last - 3].kind not in frozenset( ("JUMP_FORWARD", "RETURN_VALUE") ) or (tokens[last - 3] == "JUMP_FORWARD" and tokens[last - 3].attr != 2) elif lhs == "tryelsestmt": # We need to distinguish "try_except" from "tryelsestmt"; we do that # by making sure that the jump before the except handler jumps to # code somewhere before the end of the construct. # This AST method is slower, but the token-only based approach # didn't work as it failed with a "try" embedded inside a "try/else" # since we can't detect COME_FROM boundaries. if ast[3] == "except_handler": except_handler = ast[3] if except_handler[0] == "JUMP_FORWARD": else_start = int(except_handler[0].pattr) if last == len(tokens): last -= 1 if tokens[last] == "COME_FROM" and isinstance: last_offset = int(tokens[last].offset.split("_")[0]) return else_start >= last_offset # The above test apparently isn't good enough, so we have additional # checks distinguish "try_except" from "tryelsestmt". we do that # by checking the jump before the "END_FINALLY". # If we have: # insn # POP_TOP # END_FINALLY # COME_FROM # then insn is neither a JUMP_FORWARD nor RETURN_VALUE, # or if it is JUMP_FORWARD, then it can't be a JUMP_FORWARD to right after # COME_FROM if last == len(tokens): last -= 1 while tokens[last - 1] == "COME_FROM" and tokens[last - 2] == "COME_FROM": last -= 1 if tokens[last] == "COME_FROM" and tokens[last - 1] == "COME_FROM": last -= 1 if ( tokens[last] == "COME_FROM" and tokens[last - 1] == "END_FINALLY" and tokens[last - 2] == "POP_TOP" ): # A jump of 2 is a jump around POP_TOP, END_FINALLY which # would indicate try/else rather than try return tokens[last - 3].kind in frozenset( ("JUMP_FORWARD", "RETURN_VALUE") ) and (tokens[last - 3] != "JUMP_FORWARD" or tokens[last - 3].attr == 2) return False class Python26ParserSingle(Python2Parser, PythonParserSingle): pass if __name__ == "__main__": # Check grammar p = Python26Parser() p.check_grammar() from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE if PYTHON_VERSION_TRIPLE[:2] == (2, 6): lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY) opcode_set = set(s.opc.opname).union( set( """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_LAST """.split() ) ) remain_tokens = set(tokens) - opcode_set import re remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens]) remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items()))