From 71b1446c9cc85156671e97d20f26bc786771f40f Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 23 Aug 2022 21:36:55 -0400 Subject: [PATCH] Push reducechecks from 2.7 into 2.6 Some adaption is needed. These rules may also help earlier 2.x Python as well --- uncompyle6/parsers/parse22.py | 4 + uncompyle6/parsers/parse26.py | 158 +++++++++++------- uncompyle6/parsers/reducecheck/__init__.py | 1 + uncompyle6/parsers/reducecheck/ifelsestmt2.py | 3 + uncompyle6/parsers/reducecheck/ifstmt2.py | 90 ++++++++++ 5 files changed, 193 insertions(+), 63 deletions(-) create mode 100644 uncompyle6/parsers/reducecheck/ifstmt2.py diff --git a/uncompyle6/parsers/parse22.py b/uncompyle6/parsers/parse22.py index 108a3c31..8c4f34ae 100644 --- a/uncompyle6/parsers/parse22.py +++ b/uncompyle6/parsers/parse22.py @@ -29,6 +29,10 @@ class Python22Parser(Python23Parser): self.remove_rules(""" kvlist ::= kvlist kv2 """) + if self.version[:2] <= (2, 2): + # TODO: We may add something different or customize something + del self.reduce_check_table["ifstmt"] + class Python22ParserSingle(Python23Parser, PythonParserSingle): pass diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 269eee14..679949b0 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -6,10 +6,16 @@ spark grammar differences over Python2 for Python 2.6. from uncompyle6.parser import PythonParserSingle from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parsers.parse2 import Python2Parser -from uncompyle6.parsers.reducecheck import (except_handler, ifelsestmt2, tryexcept, tryelsestmt) +from uncompyle6.parsers.reducecheck import ( + except_handler, + ifelsestmt2, + ifstmt2, + tryexcept, + tryelsestmt, +) + class Python26Parser(Python2Parser): - def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG): super(Python26Parser, self).__init__(debug_parser) self.customized = {} @@ -207,7 +213,7 @@ class Python26Parser(Python2Parser): """ def p_comp26(self, args): - ''' + """ list_for ::= expr for_iter store list_iter JUMP_BACK come_froms POP_TOP # The JUMP FORWARD below jumps to the JUMP BACK. It seems to happen @@ -232,9 +238,9 @@ class Python26Parser(Python2Parser): list_comp ::= BUILD_LIST_0 DUP_TOP store list_iter delete list_comp ::= BUILD_LIST_0 DUP_TOP - store list_iter JUMP_BACK delete + store list_iter JUMP_BACK delete lc_body ::= LOAD_NAME expr LIST_APPEND - lc_body ::= LOAD_FAST expr LIST_APPEND + lc_body ::= LOAD_FAST expr LIST_APPEND comp_for ::= SETUP_LOOP expr for_iter store comp_iter jb_pb_come_from @@ -265,10 +271,10 @@ class Python26Parser(Python2Parser): generator_exp ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1 COME_FROM list_if ::= expr jmp_false_then list_iter - ''' + """ def p_ret26(self, args): - ''' + """ ret_and ::= expr jmp_false return_expr_or_cond COME_FROM ret_or ::= expr jmp_true return_expr_or_cond COME_FROM if_exp_ret ::= expr jmp_false_then expr RETURN_END_IF POP_TOP return_expr_or_cond @@ -279,7 +285,7 @@ class Python26Parser(Python2Parser): # FIXME: split into Python 2.5 ret_or ::= expr jmp_true return_expr_or_cond come_froms - ''' + """ def p_except26(self, args): """ @@ -342,45 +348,47 @@ class Python26Parser(Python2Parser): """ def customize_grammar_rules(self, tokens, customize): - self.remove_rules(""" + self.remove_rules( + """ withasstmt ::= expr SETUP_WITH store suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM_WITH WITH_CLEANUP END_FINALLY - """) + """ + ) super(Python26Parser, self).customize_grammar_rules(tokens, customize) self.reduce_check_table = { "except_handler": except_handler, + "ifstmt": ifstmt2, "ifelsestmt": ifelsestmt2, "tryelsestmt": tryelsestmt, "try_except": tryexcept, "tryelsestmtl": tryelsestmt, } - - self.check_reduce['and'] = 'AST' - self.check_reduce['assert_expr_and'] = 'AST' + self.check_reduce["and"] = "AST" + self.check_reduce["assert_expr_and"] = "AST" self.check_reduce["except_handler"] = "tokens" - self.check_reduce["ifstmt"] = "tokens" + self.check_reduce["ifstmt"] = "AST" self.check_reduce["ifelsestmt"] = "AST" self.check_reduce["forelselaststmtl"] = "tokens" self.check_reduce["forelsestmt"] = "tokens" - self.check_reduce['list_for'] = 'AST' - self.check_reduce['try_except'] = 'AST' - self.check_reduce['tryelsestmt'] = 'AST' - self.check_reduce['tryelsestmtl'] = 'AST' + self.check_reduce["list_for"] = "AST" + self.check_reduce["try_except"] = "AST" + self.check_reduce["tryelsestmt"] = "AST" + self.check_reduce["tryelsestmtl"] = "AST" def reduce_is_invalid(self, rule, ast, tokens, first, last): - invalid = super(Python26Parser, - self).reduce_is_invalid(rule, ast, - tokens, first, last) + invalid = super(Python26Parser, self).reduce_is_invalid( + rule, ast, tokens, first, last + ) lhs = rule[0] if invalid or tokens is None: return invalid if rule in ( - ('and', ('expr', 'jmp_false', 'expr', '\\e_come_from_opt')), - ('and', ('expr', 'jmp_false', 'expr', 'come_from_opt')), - ('assert_expr_and', ('assert_expr', 'jmp_false', 'expr')) - ): + ("and", ("expr", "jmp_false", "expr", "\\e_come_from_opt")), + ("and", ("expr", "jmp_false", "expr", "come_from_opt")), + ("assert_expr_and", ("assert_expr", "jmp_false", "expr")), + ): # FIXME: workaround profiling bug if ast[1] is None: @@ -393,9 +401,9 @@ class Python26Parser(Python2Parser): return True test_index = last - while tokens[test_index].kind == 'COME_FROM': + while tokens[test_index].kind == "COME_FROM": test_index += 1 - if tokens[test_index].kind.startswith('JUMP_IF'): + if tokens[test_index].kind.startswith("JUMP_IF"): return False # Test that jmp_false jumps to the end of "and" @@ -403,8 +411,10 @@ class Python26Parser(Python2Parser): jmp_false = ast[1][0] jmp_target = jmp_false.offset + jmp_false.attr + 3 - return not (jmp_target == tokens[test_index].offset or - tokens[last].pattr == jmp_false.pattr) + return not ( + jmp_target == tokens[test_index].offset + or tokens[last].pattr == jmp_false.pattr + ) elif lhs in ("forelselaststmtl", "forelsestmt"): # print("XXX", first, last) @@ -424,27 +434,36 @@ class Python26Parser(Python2Parser): # since the operand can be a relative offset rather than # an absolute offset. setup_inst = self.insts[self.offset2inst_index[tokens[first].offset]] - last = min(len(tokens)-1, last) + last = min(len(tokens) - 1, last) if self.version <= (2, 2) and tokens[last] == "COME_FROM": last += 1 - return tokens[last-1].off2int() > setup_inst.argval + return tokens[last - 1].off2int() > setup_inst.argval elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")): - for i in range(last-1, last-4, -1): + for i in range(last - 1, last - 4, -1): t = tokens[i] if t == "JUMP_FORWARD": - return t.attr > tokens[min(last, len(tokens)-1)].off2int() + return t.attr > tokens[min(last, len(tokens) - 1)].off2int() elif t not in ("POP_TOP", "COME_FROM"): break pass pass elif rule == ( - 'list_for', - ('expr', 'for_iter', 'store', 'list_iter', - 'JUMP_ABSOLUTE', 'come_froms', 'POP_TOP', 'jb_pop')): + "list_for", + ( + "expr", + "for_iter", + "store", + "list_iter", + "JUMP_ABSOLUTE", + "come_froms", + "POP_TOP", + "jb_pop", + ), + ): # The JUMP_ABSOLUTE has to be to the last POP_TOP or this is invalid ja_attr = ast[4].attr return tokens[last].offset != ja_attr - elif lhs == 'try_except': + elif lhs == "try_except": # We need to distingush try_except from tryelsestmt and we do that # by checking the jump before the END_FINALLY # If we have: @@ -457,16 +476,19 @@ class Python26Parser(Python2Parser): # COME_FROM if last == len(tokens): last -= 1 - if tokens[last] != 'COME_FROM' and tokens[last-1] == 'COME_FROM': + if tokens[last] != "COME_FROM" and tokens[last - 1] == "COME_FROM": last -= 1 - if (tokens[last] == 'COME_FROM' - and tokens[last-1] == 'END_FINALLY' - and tokens[last-2] == 'POP_TOP'): + if ( + tokens[last] == "COME_FROM" + and tokens[last - 1] == "END_FINALLY" + and tokens[last - 2] == "POP_TOP" + ): # A jump of 2 is a jump around POP_TOP, END_FINALLY which # would indicate try/else rather than try - return (tokens[last-3].kind not in frozenset(('JUMP_FORWARD', 'RETURN_VALUE')) - or (tokens[last-3] == 'JUMP_FORWARD' and tokens[last-3].attr != 2)) - elif lhs == 'tryelsestmt': + return tokens[last - 3].kind not in frozenset( + ("JUMP_FORWARD", "RETURN_VALUE") + ) or (tokens[last - 3] == "JUMP_FORWARD" and tokens[last - 3].attr != 2) + elif lhs == "tryelsestmt": # We need to distingush try_except from tryelsestmt and we do that # by making sure that the jump before the except handler jumps to @@ -475,17 +497,16 @@ class Python26Parser(Python2Parser): # didn't work as it failed with a "try" embedded inside a "try/else" # since we can't detect COME_FROM boundaries. - if ast[3] == 'except_handler': + if ast[3] == "except_handler": except_handler = ast[3] - if except_handler[0] == 'JUMP_FORWARD': + if except_handler[0] == "JUMP_FORWARD": else_start = int(except_handler[0].pattr) if last == len(tokens): last -= 1 - if tokens[last] == 'COME_FROM' and isinstance: - last_offset = int(tokens[last].offset.split('_')[0]) + if tokens[last] == "COME_FROM" and isinstance: + last_offset = int(tokens[last].offset.split("_")[0]) return else_start >= last_offset - # The above test apparently isn't good enough, so we have additional # checks distinguish try_except from tryelsestmt and we do that # by checking the jump before the END_FINALLY @@ -499,41 +520,52 @@ class Python26Parser(Python2Parser): # COME_FROM if last == len(tokens): last -= 1 - while tokens[last-1] == 'COME_FROM' and tokens[last-2] == 'COME_FROM': + while tokens[last - 1] == "COME_FROM" and tokens[last - 2] == "COME_FROM": last -= 1 - if tokens[last] == 'COME_FROM' and tokens[last-1] == 'COME_FROM': + if tokens[last] == "COME_FROM" and tokens[last - 1] == "COME_FROM": last -= 1 - if (tokens[last] == 'COME_FROM' - and tokens[last-1] == 'END_FINALLY' - and tokens[last-2] == 'POP_TOP'): + if ( + tokens[last] == "COME_FROM" + and tokens[last - 1] == "END_FINALLY" + and tokens[last - 2] == "POP_TOP" + ): # A jump of 2 is a jump around POP_TOP, END_FINALLY which # would indicate try/else rather than try - return (tokens[last-3].kind in frozenset(('JUMP_FORWARD', 'RETURN_VALUE')) - and (tokens[last-3] != 'JUMP_FORWARD' or tokens[last-3].attr == 2)) - + return tokens[last - 3].kind in frozenset( + ("JUMP_FORWARD", "RETURN_VALUE") + ) and (tokens[last - 3] != "JUMP_FORWARD" or tokens[last - 3].attr == 2) return False + + class Python26ParserSingle(Python2Parser, PythonParserSingle): pass -if __name__ == '__main__': + +if __name__ == "__main__": # Check grammar p = Python26Parser() p.check_grammar() from uncompyle6 import PYTHON_VERSION, IS_PYPY + if PYTHON_VERSION == 2.6: lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets() from uncompyle6.scanner import get_scanner + s = get_scanner(PYTHON_VERSION, IS_PYPY) - opcode_set = set(s.opc.opname).union(set( - """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM + opcode_set = set(s.opc.opname).union( + set( + """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LAMBDA_MARKER RETURN_LAST - """.split())) + """.split() + ) + ) remain_tokens = set(tokens) - opcode_set import re - remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens]) + + remain_tokens = set([re.sub("_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set print(remain_tokens) # print(sorted(p.rule2name.items())) diff --git a/uncompyle6/parsers/reducecheck/__init__.py b/uncompyle6/parsers/reducecheck/__init__.py index e0d1e8fa..c91bd332 100644 --- a/uncompyle6/parsers/reducecheck/__init__.py +++ b/uncompyle6/parsers/reducecheck/__init__.py @@ -6,6 +6,7 @@ from uncompyle6.parsers.reducecheck.ifelsestmt import * from uncompyle6.parsers.reducecheck.ifelsestmt2 import * from uncompyle6.parsers.reducecheck.iflaststmt import * from uncompyle6.parsers.reducecheck.ifstmt import * +from uncompyle6.parsers.reducecheck.ifstmt2 import * from uncompyle6.parsers.reducecheck.ifstmts_jump import * from uncompyle6.parsers.reducecheck.or_check import * from uncompyle6.parsers.reducecheck.testtrue import * diff --git a/uncompyle6/parsers/reducecheck/ifelsestmt2.py b/uncompyle6/parsers/reducecheck/ifelsestmt2.py index e47e0092..c7ef9042 100644 --- a/uncompyle6/parsers/reducecheck/ifelsestmt2.py +++ b/uncompyle6/parsers/reducecheck/ifelsestmt2.py @@ -1,4 +1,7 @@ # Copyright (c) 2020-2022 Rocky Bernstein +""" +If/else statement reduction check for Python 2.6 (and older?) +""" IFELSE_STMT_RULES = frozenset( [ diff --git a/uncompyle6/parsers/reducecheck/ifstmt2.py b/uncompyle6/parsers/reducecheck/ifstmt2.py new file mode 100644 index 00000000..a8e616b9 --- /dev/null +++ b/uncompyle6/parsers/reducecheck/ifstmt2.py @@ -0,0 +1,90 @@ +# Copyright (c) 2022 Rocky Bernstein +""" +If statement reduction check for Python 2.6 (and older?) +""" + + +def ifstmt2(self, lhs, n, rule, ast, tokens, first, last): + + # print("XXX", first, last) + # for t in range(first, last): + # print(tokens[t]) + # print("=" * 30) + + if lhs == "ifstmtl": + if last == n: + last -= 1 + pass + if tokens[last].attr and isinstance(tokens[last].attr, int): + if tokens[first].offset >= tokens[last].attr: + return True + pass + pass + + # Make sure jumps don't extend beyond the end of the if statement. + l = last + if l == n: + l -= 1 + if isinstance(tokens[l].offset, str): + last_offset = int(tokens[l].offset.split("_")[0], 10) + else: + last_offset = tokens[l].offset + for i in range(first, l): + t = tokens[i] + # instead of POP_JUMP_IF, should we use op attributes? + if t.kind in ("JUMP_IF_FALSE", "JUMP_IF_TRUE"): + jif_target = int(t.pattr) + target_instr = self.insts[self.offset2inst_index[jif_target]] + if lhs == "iflaststmtl" and target_instr.opname == "JUMP_ABSOLUTE": + jif_target = target_instr.arg + if jif_target > last_offset: + # In come cases, where we have long bytecode, a + # "POP_JUMP_IF_TRUE/FALSE" offset might be too + # large for the instruction; so instead it + # jumps to a JUMP_FORWARD. Allow that here. + if tokens[l] == "JUMP_FORWARD": + return tokens[l].attr != jif_target + return True + elif lhs == "ifstmtl" and tokens[first].off2int() > jif_target: + # A conditional JUMP to the loop is expected for "ifstmtl" + return False + pass + pass + pass + + if ast: + testexpr = ast[0] + + if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP": + # iflastsmtl jumped outside of loop. No good. + return True + + if testexpr[0] in ("testtrue", "testfalse"): + test = testexpr[0] + if len(test) > 1 and test[1].kind.startswith("jmp_"): + jmp_target = int(test[1][0].pattr) + if last == len(tokens): + last -= 1 + if ( + tokens[first].off2int(prefer_last=True) + <= jmp_target + < tokens[last].off2int(prefer_last=False) + ): + return True + # jmp_target less than tokens[first] is okay - is to a loop + # jmp_target equal tokens[last] is also okay: normal non-optimized non-loop jump + if jmp_target > tokens[last].off2int(): + # One more weird case to look out for + # if c1: + # if c2: # Jumps around the *outer* "else" + # ... + # else: + if jmp_target == tokens[last - 1].attr: + return False + if last < n and tokens[last].kind.startswith("JUMP"): + return False + return True + + pass + pass + return False