From 21b4d52a77d1ce0ff864c0074ad0d4bccdd70552 Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 23 Aug 2022 16:50:50 -0400 Subject: [PATCH] Correct 2.6 erroneous ifelse detection --- .../looping/08_while1_if_continue.py | 6 +- uncompyle6/parsers/parse2.py | 17 +- uncompyle6/parsers/parse24.py | 4 + uncompyle6/parsers/parse26.py | 3 +- uncompyle6/parsers/reducecheck/__init__.py | 1 + uncompyle6/parsers/reducecheck/ifelsestmt2.py | 147 ++++++++++++++++++ 6 files changed, 168 insertions(+), 10 deletions(-) create mode 100644 uncompyle6/parsers/reducecheck/ifelsestmt2.py diff --git a/test/simple_source/looping/08_while1_if_continue.py b/test/simple_source/looping/08_while1_if_continue.py index de4dd758..2adc0b4c 100644 --- a/test/simple_source/looping/08_while1_if_continue.py +++ b/test/simple_source/looping/08_while1_if_continue.py @@ -19,7 +19,7 @@ def _parse(a, b, source, state): else: raise -def _parse2(source, state): +def _parse2(source, state, a, b, this): while 1: if a: if b: @@ -32,10 +32,10 @@ def _parse2(source, state): if b: break - x = 3 + x = this # Bug was in 2.3 decompilation -def _parse3(source, state): +def _parse3(source, state, a, b): while 1: if a: if b: diff --git a/uncompyle6/parsers/parse2.py b/uncompyle6/parsers/parse2.py index 3ce9c9fe..42b43c5e 100644 --- a/uncompyle6/parsers/parse2.py +++ b/uncompyle6/parsers/parse2.py @@ -27,7 +27,7 @@ that a later phase can turn into a sequence of ASCII text. from __future__ import print_function -from uncompyle6.parsers.reducecheck import (except_handler_else, ifelsestmt, tryelsestmt) +from uncompyle6.parsers.reducecheck import except_handler_else, ifelsestmt, tryelsestmt from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.treenode import SyntaxTree from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG @@ -313,11 +313,14 @@ class Python2Parser(PythonParser): opname_base = opname[: opname.rfind("_")] if opname in ("BUILD_CONST_LIST", "BUILD_CONST_SET"): - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s expr ::= const_list - """ % opname + """ + % opname + ) self.addRule(rule, nop_func) # The order of opname listed is roughly sorted below @@ -696,7 +699,7 @@ class Python2Parser(PythonParser): # an optimization where the "and" jump_false is back to a loop. jmp_false = ast[1] if jmp_false[0] == "POP_JUMP_IF_FALSE": - while (first < last and isinstance(tokens[last].offset, str)): + while first < last and isinstance(tokens[last].offset, str): last -= 1 if jmp_false[0].attr < tokens[last].offset: return True @@ -705,8 +708,10 @@ class Python2Parser(PythonParser): # or that it jumps to the same place as the end of "and" jmp_false = ast[1][0] jmp_target = jmp_false.offset + jmp_false.attr + 3 - return not (jmp_target == tokens[last].offset or - tokens[last].pattr == jmp_false.pattr) + return not ( + jmp_target == tokens[last].offset + or tokens[last].pattr == jmp_false.pattr + ) # Dead code testing... # if lhs == 'while1elsestmt': # from trepan.api import debug; debug() diff --git a/uncompyle6/parsers/parse24.py b/uncompyle6/parsers/parse24.py index 11bf4dcc..05e4b5e3 100644 --- a/uncompyle6/parsers/parse24.py +++ b/uncompyle6/parsers/parse24.py @@ -97,6 +97,10 @@ class Python24Parser(Python25Parser): if self.version[:2] == (2, 4): self.check_reduce['nop_stmt'] = 'tokens' + if self.version[:2] <= (2, 4): + # TODO: We may add something different or customize something + del self.reduce_check_table["ifelsestmt"] + def reduce_is_invalid(self, rule, ast, tokens, first, last): invalid = super(Python24Parser, self).reduce_is_invalid(rule, ast, diff --git a/uncompyle6/parsers/parse26.py b/uncompyle6/parsers/parse26.py index 258d5f33..269eee14 100644 --- a/uncompyle6/parsers/parse26.py +++ b/uncompyle6/parsers/parse26.py @@ -6,7 +6,7 @@ spark grammar differences over Python2 for Python 2.6. from uncompyle6.parser import PythonParserSingle from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.parsers.parse2 import Python2Parser -from uncompyle6.parsers.reducecheck import (except_handler, tryexcept, tryelsestmt) +from uncompyle6.parsers.reducecheck import (except_handler, ifelsestmt2, tryexcept, tryelsestmt) class Python26Parser(Python2Parser): @@ -350,6 +350,7 @@ class Python26Parser(Python2Parser): super(Python26Parser, self).customize_grammar_rules(tokens, customize) self.reduce_check_table = { "except_handler": except_handler, + "ifelsestmt": ifelsestmt2, "tryelsestmt": tryelsestmt, "try_except": tryexcept, "tryelsestmtl": tryelsestmt, diff --git a/uncompyle6/parsers/reducecheck/__init__.py b/uncompyle6/parsers/reducecheck/__init__.py index a2bf8bb4..e0d1e8fa 100644 --- a/uncompyle6/parsers/reducecheck/__init__.py +++ b/uncompyle6/parsers/reducecheck/__init__.py @@ -3,6 +3,7 @@ from uncompyle6.parsers.reducecheck.aug_assign import * from uncompyle6.parsers.reducecheck.except_handler import * from uncompyle6.parsers.reducecheck.except_handler_else import * from uncompyle6.parsers.reducecheck.ifelsestmt import * +from uncompyle6.parsers.reducecheck.ifelsestmt2 import * from uncompyle6.parsers.reducecheck.iflaststmt import * from uncompyle6.parsers.reducecheck.ifstmt import * from uncompyle6.parsers.reducecheck.ifstmts_jump import * diff --git a/uncompyle6/parsers/reducecheck/ifelsestmt2.py b/uncompyle6/parsers/reducecheck/ifelsestmt2.py new file mode 100644 index 00000000..e47e0092 --- /dev/null +++ b/uncompyle6/parsers/reducecheck/ifelsestmt2.py @@ -0,0 +1,147 @@ +# Copyright (c) 2020-2022 Rocky Bernstein + +IFELSE_STMT_RULES = frozenset( + [ + ( + "ifelsestmt", + ( + "testexpr_then", + "pass", + "filler", + "else_suitel", + "COME_FROM", + "POP_TOP", + ), + ), + ( + "ifelsestmt", + ( + "testexpr_then", + "c_stmts_opt", + "\\e_filler", + "else_suitel", + "come_froms", + "POP_TOP", + ), + ), + ( + "ifelsestmt", + ( + "testexpr_then", + "\\e_c_stmts_opt", + "\\e_filler", + "else_suitel", + "come_froms", + "POP_TOP", + ), + ), + # We may do something like add these in the future: + ] +) + + +def ifelsestmt2(self, lhs, n, rule, tree, tokens, first, last): + + if (last + 1) < n and tokens[last + 1] == "COME_FROM_LOOP" and lhs != "ifelsestmtc": + # ifelsestmt jumped outside of loop. No good. + return True + + # print("XXX", first, last) + # for t in range(first, last): + # print(tokens[t]) + # print("=" * 30) + + if rule not in IFELSE_STMT_RULES: + # print("XXX", rule) + return False + + # Avoid if/else where the "then" is a "raise_stmt1" for an + # assert statement. Parse this as an "assert" instead. + stmts = tree[1] + if stmts in ("c_stmts",) and len(stmts) == 1: + raise_stmt1 = stmts[0] + if raise_stmt1 == "raise_stmt1" and raise_stmt1[0] in ("LOAD_ASSERT",): + return True + + # Make sure all of the "come froms" offset at the + # end of the "if" come from somewhere inside the "if". + # Since the come_froms are ordered so that lowest + # offset COME_FROM is last, it is sufficient to test + # just the last one. + if len(tree) == 6 and tree[-1] == "POP_TOP": + # FIXME: There is weirdness in the grammar we need to work around. + # we need to clean up the grammar. + last_token = tree[-2] + if last_token == "COME_FROM" and tokens[first].offset > last_token.attr: + if ( + self.insts[self.offset2inst_index[last_token.attr]].opname + != "SETUP_LOOP" + ): + return True + + testexpr = tree[0] + + # Check that the condition portion of the "if" + # jumps to the "else" part. + if testexpr[0] in ("testtrue", "testfalse", "testfalse_then"): + if_condition = testexpr[0] + + else_suite = tree[3] + assert else_suite.kind.startswith("else_suite") + + if len(if_condition) > 1 and if_condition[1].kind.startswith("jmp_"): + if last == n: + last -= 1 + jmp = if_condition[1] + jmp_target = int(jmp[0].pattr) + + # Below we check that jmp_target is jumping to a feasible + # location. It should be to the transition after the "then" + # block and to the beginning of the "else" block. + # However the "if/else" is inside a loop the false test can be + # back to the loop. + + # FIXME: the below logic for jf_cfs could probably be + # simplified. + if tree[2] == "filler": + jump_else_end = tree[3] + else: + jump_else_end = tree[2] + + if jump_else_end == "jf_cfs": + jump_else_end = jump_else_end[0] + + if jump_else_end == "JUMP_FORWARD": + endif_target = int(jump_else_end.pattr) + last_offset = tokens[last].off2int() + if endif_target != last_offset: + return True + last_offset = tokens[last].off2int(prefer_last=False) + if jmp_target <= last_offset: + # jmp_target should be jumping to the end of the if/then/else + # but is it jumping to the beginning of the "else" or before + return True + if ( + jump_else_end in ("jf_cfs", "jump_forward_else") + and jump_else_end[0] == "JUMP_FORWARD" + ): + # If the "else" jump jumps before the end of the the "if .. else end", then this + # is not this kind of "ifelsestmt". + jump_else_forward = jump_else_end[0] + jump_else_forward_target = jump_else_forward.attr + if jump_else_forward_target < last_offset: + return True + pass + if ( + jump_else_end in ("jb_elsec", "jb_elsel", "jf_cfs", "jb_cfs") + and jump_else_end[-1] == "COME_FROM" + ): + if jump_else_end[-1].off2int() != jmp_target: + return True + + if tokens[first].off2int() > jmp_target: + return True + + return (jmp_target > last_offset) and tokens[last] != "JUMP_FORWARD" + + return False