From 0261ea1e66e4133bdd3f737d235fc1a51d98fe15 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 5 Mar 2013 16:44:14 +0100 Subject: [PATCH] Grammar improvement --- README.rst | 8 ++--- uncompyle2/parser.py | 69 +++++++++++++++++++++++------------------ uncompyle2/scanner.py | 4 +-- uncompyle2/scanner25.py | 25 ++++++++++----- uncompyle2/scanner26.py | 23 ++++++++++---- uncompyle2/scanner27.py | 63 ++++++++++++++++++++++--------------- uncompyle2/walker.py | 39 +++++++++++++++++------ 7 files changed, 147 insertions(+), 84 deletions(-) diff --git a/README.rst b/README.rst index eebfc4d9..4c362a6d 100755 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ hashes get pretty-printed. a tool called 'decompyle'. This tool has been vastly improved by Hartmut Goebel `http://www.crazy-compilers.com/`_ -### Additional note (3 July 2004, Ben Burton): +# Additional note (3 July 2004, Ben Burton): This software is no longer available from the original website. It has now become a commercial decompilation service, with no software @@ -28,7 +28,7 @@ Any developers seeking to make alterations or enhancements to this code should therefore consider these debian packages an appropriate starting point. -### Additional note (5 June 2012): +# Additional note (5 June 2012): The decompilation of python bytecode 2.5 & 2.6 is based on the work of Eloi Vanderbeken. bytecode is translated to a pseudo 2.7 python bytecode @@ -86,9 +86,9 @@ http://www.python.org/doc/current/inst/inst.html Usage ----- -uncompyle2 -h prints short usage -uncompyle2 --help prints long usage +./uncompyle2.py -h prints usage +./test_pythonlib.py test files and python library Known Bugs/Restrictions ----------------------- diff --git a/uncompyle2/parser.py b/uncompyle2/parser.py index f29f4829..ed22c8a9 100755 --- a/uncompyle2/parser.py +++ b/uncompyle2/parser.py @@ -231,9 +231,7 @@ class Parser(GenericASTBuilder): importlist2 ::= importlist2 import_as importlist2 ::= import_as import_as ::= IMPORT_NAME designator - import_as ::= IMPORT_NAME LOAD_ATTR designator - import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR designator - import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator + import_as ::= IMPORT_NAME load_attrs designator import_as ::= IMPORT_FROM designator importstmt ::= LOAD_CONST LOAD_CONST import_as @@ -247,10 +245,11 @@ class Parser(GenericASTBuilder): imports_cont ::= import_cont import_cont ::= LOAD_CONST LOAD_CONST import_as_cont import_as_cont ::= IMPORT_NAME_CONT designator - import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR designator - import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR designator - import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR LOAD_ATTR designator + import_as_cont ::= IMPORT_NAME_CONT load_attrs designator import_as_cont ::= IMPORT_FROM designator + + load_attrs ::= LOAD_ATTR + load_attrs ::= load_attrs LOAD_ATTR ''' def p_grammar(self, args): @@ -331,13 +330,13 @@ class Parser(GenericASTBuilder): call_stmt ::= expr POP_TOP stmt ::= return_stmt - return_stmt ::= expr RETURN_VALUE + return_stmt ::= ret_expr RETURN_VALUE return_stmts ::= return_stmt return_stmts ::= _stmts return_stmt return_if_stmts ::= return_if_stmt return_if_stmts ::= _stmts return_if_stmt - return_if_stmt ::= expr RETURN_END_IF + return_if_stmt ::= ret_expr RETURN_END_IF stmt ::= break_stmt break_stmt ::= BREAK_LOOP @@ -349,9 +348,15 @@ class Parser(GenericASTBuilder): continue_stmts ::= lastl_stmt continue_stmt continue_stmts ::= continue_stmt - stmt ::= raise_stmt - raise_stmt ::= exprlist RAISE_VARARGS - raise_stmt ::= nullexprlist RAISE_VARARGS + stmt ::= raise_stmt0 + stmt ::= raise_stmt1 + stmt ::= raise_stmt2 + stmt ::= raise_stmt3 + + raise_stmt0 ::= RAISE_VARARGS_0 + raise_stmt1 ::= expr RAISE_VARARGS_1 + raise_stmt2 ::= expr expr RAISE_VARARGS_2 + raise_stmt3 ::= expr expr expr RAISE_VARARGS_3 stmt ::= exec_stmt exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT @@ -406,24 +411,14 @@ class Parser(GenericASTBuilder): jmp_false ::= JUMP_IF_FALSE jmp_true ::= POP_JUMP_IF_TRUE jmp_true ::= JUMP_IF_TRUE - - multi_come_from ::= multi_come_from COME_FROM - multi_come_from ::= - assert_end ::= multi_come_from POP_TOP - assert_end ::= - - assert ::= assert_expr jmp_true - LOAD_ASSERT RAISE_VARARGS assert_end - assert2 ::= assert_expr jmp_true - LOAD_ASSERT expr RAISE_VARARGS assert_end - assert ::= assert_expr jmp_true - LOAD_GLOBAL RAISE_VARARGS assert_end - assert2 ::= assert_expr jmp_true - LOAD_GLOBAL expr RAISE_VARARGS assert_end - + + assert ::= assert_expr jmp_true LOAD_ASSERT RAISE_VARARGS_1 + assert2 ::= assert_expr jmp_true LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 + assert2 ::= assert_expr jmp_true LOAD_ASSERT expr RAISE_VARARGS_2 + + assert_expr ::= expr assert_expr ::= assert_expr_or assert_expr ::= assert_expr_and - assert_expr ::= expr assert_expr_or ::= assert_expr jmp_true expr assert_expr_and ::= assert_expr jmp_false expr @@ -477,7 +472,7 @@ class Parser(GenericASTBuilder): except_suite ::= c_stmts_opt JUMP_FORWARD except_suite ::= c_stmts_opt jmp_abs except_suite ::= return_stmts - + except_cond1 ::= DUP_TOP expr COMPARE_OP jmp_false POP_TOP POP_TOP POP_TOP @@ -638,12 +633,24 @@ class Parser(GenericASTBuilder): conditional ::= expr jmp_false expr JUMP_ABSOLUTE expr expr ::= conditionalnot conditionalnot ::= expr jmp_true expr _jump expr COME_FROM + + ret_expr ::= expr + ret_expr ::= ret_and + ret_expr ::= ret_or + + ret_expr_or_cond ::= ret_expr + ret_expr_or_cond ::= ret_cond + ret_expr_or_cond ::= ret_cond_not + + ret_and ::= expr jmp_false ret_expr_or_cond COME_FROM + ret_or ::= expr jmp_true ret_expr_or_cond COME_FROM + ret_cond ::= expr jmp_false expr RETURN_END_IF ret_expr_or_cond + ret_cond_not ::= expr jmp_true expr RETURN_END_IF ret_expr_or_cond stmt ::= return_lambda stmt ::= conditional_lambda - stmt ::= conditional_lambda2 - return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER + return_lambda ::= ret_expr RETURN_VALUE LAMBDA_MARKER conditional_lambda ::= expr jmp_false return_if_stmt return_stmt LAMBDA_MARKER cmp ::= cmp_list @@ -744,7 +751,7 @@ def parse(tokens, customize): rule = 'unpack ::= ' + k + ' designator'*v elif op == 'UNPACK_LIST': rule = 'unpack_list ::= ' + k + ' designator'*v - elif op == 'DUP_TOPX': + elif op in ('DUP_TOPX', 'RAISE_VARARGS'): # no need to add a rule continue #rule = 'dup_topx ::= ' + 'expr '*v + k diff --git a/uncompyle2/scanner.py b/uncompyle2/scanner.py index 13dcedd8..55a20710 100755 --- a/uncompyle2/scanner.py +++ b/uncompyle2/scanner.py @@ -93,8 +93,8 @@ class Scanner(object): return target def get_argument(self, pos): - target = self.code[pos+1] + self.code[pos+2] * 256 - return target + arg = self.code[pos+1] + self.code[pos+2] * 256 + return arg def print_bytecode(self): for i in self.op_range(0, len(self.code)): diff --git a/uncompyle2/scanner25.py b/uncompyle2/scanner25.py index 7f20eed4..8f717865 100755 --- a/uncompyle2/scanner25.py +++ b/uncompyle2/scanner25.py @@ -62,7 +62,7 @@ class Scanner25(scan.Scanner): self.prev.append(i) if self.op_hasArgument(op): self.prev.append(i) - self.prev.append(i) + self.prev.append(i) j = 0 linestarts = self.linestarts self.lines = [] @@ -78,6 +78,13 @@ class Scanner25(scan.Scanner): while j < codelen: self.lines.append(linetuple(prev_line_no, codelen)) j+=1 + + self.load_asserts = set() + for i in self.op_range(0, codelen): + if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL: + if names[self.get_argument(i+3)] == 'AssertionError': + self.load_asserts.add(i+3) + # self.lines contains (block,addrLastInstr) cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) @@ -163,7 +170,7 @@ class Scanner25(scan.Scanner): UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, - CALL_FUNCTION_VAR_KW, DUP_TOPX, + CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into @@ -185,11 +192,8 @@ class Scanner25(scan.Scanner): op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: - try: - if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE': - op_name = 'LOAD_ASSERT' - except AttributeError: - pass + if offset in self.load_asserts: + op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' @@ -573,6 +577,7 @@ class Scanner25(scan.Scanner): if except_match: jmp = self.prev[self.get_target(except_match)] self.ignore_if.add(except_match) + self.not_continue.add(jmp) return jmp count_END_FINALLY = 0 @@ -584,6 +589,7 @@ class Scanner25(scan.Scanner): if self.code[self.prev[i]] == NOP: i = self.prev[i] assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + self.not_continue.add(self.prev[i]) return self.prev[i] count_END_FINALLY += 1 elif op in (SETUP_EXCEPT, SETUP_FINALLY): @@ -784,6 +790,11 @@ class Scanner25(scan.Scanner): self.fixed_jumps[pos] = match[-1] return else: # op == PJIT + if (pos+3) in self.load_asserts: + if code[pre[rtarget]] == RAISE_VARARGS: + return + self.load_asserts.remove(pos+3) + next = self.next_stmt[pos] if pre[next] == pos: pass diff --git a/uncompyle2/scanner26.py b/uncompyle2/scanner26.py index 408d9b6d..efc1afda 100755 --- a/uncompyle2/scanner26.py +++ b/uncompyle2/scanner26.py @@ -79,6 +79,13 @@ class Scanner26(scan.Scanner): self.lines.append(linetuple(prev_line_no, codelen)) j+=1 # self.lines contains (block,addrLastInstr) + + self.load_asserts = set() + for i in self.op_range(0, codelen): + if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL: + if names[self.get_argument(i+3)] == 'AssertionError': + self.load_asserts.add(i+3) + cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) @@ -164,7 +171,7 @@ class Scanner26(scan.Scanner): UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, - CALL_FUNCTION_VAR_KW, DUP_TOPX, + CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into @@ -186,11 +193,8 @@ class Scanner26(scan.Scanner): op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: - try: - if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE': - op_name = 'LOAD_ASSERT' - except AttributeError: - pass + if offset in self.load_asserts: + op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' @@ -569,6 +573,7 @@ class Scanner26(scan.Scanner): if except_match: jmp = self.prev[self.get_target(except_match)] self.ignore_if.add(except_match) + self.not_continue.add(jmp) return jmp count_END_FINALLY = 0 @@ -580,6 +585,7 @@ class Scanner26(scan.Scanner): if self.code[self.prev[i]] == NOP: i = self.prev[i] assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + self.not_continue.add(self.prev[i]) return self.prev[i] count_END_FINALLY += 1 elif op in (SETUP_EXCEPT, SETUP_FINALLY): @@ -783,6 +789,11 @@ class Scanner26(scan.Scanner): self.fixed_jumps[pos] = match[-1] return else: # op == PJIT + if (pos+3) in self.load_asserts: + if code[pre[rtarget]] == RAISE_VARARGS: + return + self.load_asserts.remove(pos+3) + next = self.next_stmt[pos] if pre[next] == pos: pass diff --git a/uncompyle2/scanner27.py b/uncompyle2/scanner27.py index c67968a4..cffdd54a 100755 --- a/uncompyle2/scanner27.py +++ b/uncompyle2/scanner27.py @@ -56,8 +56,6 @@ class Scanner27(scan.Scanner): self.lines.append(linetuple(prev_line_no, n)) j+=1 # self.lines contains (block,addrLastInstr) - cf = self.find_jump_targets(code) - # contains (code, [addrRefToCode]) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): @@ -73,6 +71,14 @@ class Scanner27(scan.Scanner): names = co.co_names varnames = co.co_varnames + self.load_asserts = set() + for i in self.op_range(0, n): + if code[i] == PJIT and code[i+3] == LOAD_GLOBAL: + if names[self.get_argument(i+3)] == 'AssertionError': + self.load_asserts.add(i+3) + + cf = self.find_jump_targets(code) + # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} @@ -108,7 +114,7 @@ class Scanner27(scan.Scanner): op_name = opname[op] oparg = None; pattr = None if op >= HAVE_ARGUMENT: - oparg = code[offset+1] + code[offset+2] * 256 + extended_arg + oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == EXTENDED_ARG: extended_arg = oparg * 65536L @@ -151,7 +157,7 @@ class Scanner27(scan.Scanner): UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, - CALL_FUNCTION_VAR_KW, DUP_TOPX, + CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into @@ -173,11 +179,8 @@ class Scanner27(scan.Scanner): op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: - try: - if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE': - op_name = 'LOAD_ASSERT' - except AttributeError: - pass + if offset in self.load_asserts: + op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' @@ -299,6 +302,7 @@ class Scanner27(scan.Scanner): if except_match: jmp = self.prev[self.get_target(except_match)] self.ignore_if.add(except_match) + self.not_continue.add(jmp) return jmp count_END_FINALLY = 0 @@ -308,6 +312,7 @@ class Scanner27(scan.Scanner): if op == END_FINALLY: if count_END_FINALLY == count_SETUP_: assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + self.not_continue.add(self.prev[i]) return self.prev[i] count_END_FINALLY += 1 elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): @@ -503,6 +508,11 @@ class Scanner27(scan.Scanner): self.fixed_jumps[pos] = match[-1] return else: # op == PJIT + if (pos+3) in self.load_asserts: + if code[pre[rtarget]] == RAISE_VARARGS: + return + self.load_asserts.remove(pos+3) + next = self.next_stmt[pos] if pre[next] == pos: pass @@ -511,19 +521,30 @@ class Scanner27(scan.Scanner): if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): self.fixed_jumps[pos] = pre[next] return - elif code[next] == JA and code[target] in (JA, JF) \ - and self.get_target(target) == self.get_target(next): - self.fixed_jumps[pos] = pre[next] - return + elif code[next] == JA and code[target] in (JA, JF): + next_target = self.get_target(next) + if self.get_target(target) == next_target: + self.fixed_jumps[pos] = pre[next] + return + elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target): + self.fixed_jumps[pos] = pre[next] + return #don't add a struct for a while test, it's already taken care of if pos in self.ignore_if: return if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ - and pre[rtarget] != pos and pre[pre[rtarget]] != pos \ - and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA): - rtarget = pre[rtarget] + and pre[rtarget] != pos and pre[pre[rtarget]] != pos: + if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK: + if code[pre[pre[rtarget]]] != JA: + pass + elif self.get_target(pre[pre[rtarget]]) != target: + pass + else: + rtarget = pre[rtarget] + else: + rtarget = pre[rtarget] #does the if jump just beyond a jump op, then this is probably an if statement if code[pre[rtarget]] in (JA, JF): if_end = self.get_target(pre[rtarget]) @@ -552,15 +573,7 @@ class Scanner27(scan.Scanner): elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): target = self.get_target(pos, op) - if target > pos: - unop_target = self.last_instr(pos, target, JF, target) - if unop_target and code[unop_target+3] != ROT_TWO: - self.fixed_jumps[pos] = unop_target - else: - self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) - - - + self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) def find_jump_targets(self, code): ''' diff --git a/uncompyle2/walker.py b/uncompyle2/walker.py index 1c42fa32..e21d21a4 100755 --- a/uncompyle2/walker.py +++ b/uncompyle2/walker.py @@ -54,7 +54,7 @@ minint = -sys.maxint-1 # the end of functions). RETURN_LOCALS = AST('return_stmt', - [ AST('expr', [ Token('LOAD_LOCALS') ]), + [ AST('ret_expr', [AST('expr', [ Token('LOAD_LOCALS') ])]), Token('RETURN_VALUE')]) @@ -199,13 +199,15 @@ TABLE_DIRECT = { # 'dup_topx': ( '%c', 0), 'designList': ( '%c = %c', 0, -1 ), 'and': ( '%c and %c', 0, 2 ), + 'ret_and': ( '%c and %c', 0, 2 ), 'and2': ( '%c', 3 ), 'or': ( '%c or %c', 0, 2 ), + 'ret_or': ( '%c or %c', 0, 2 ), 'conditional': ( '%p if %p else %p', (2,27), (0,27), (4,27)), - 'conditionaland': ( '%p if %p and %p else %p', (4,27), (0,24), (2,24), (6,27)), + 'ret_cond': ( '%p if %p else %p', (2,27), (0,27), (4,27)), 'conditionalnot': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), + 'ret_cond_not': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), 'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3), - 'conditional_lambda2': ( '(%c if %p and %p else %c)', 4, (0,24), (2,24), 5), 'return_lambda': ('%c', 0), 'compare': ( '%p %[-1]{pattr} %p', (0,19), (1,19) ), 'cmp_list': ( '%p %p', (0,20), (1,19)), @@ -237,8 +239,11 @@ TABLE_DIRECT = { 'call_stmt': ( '%|%p\n', (0,200)), 'break_stmt': ( '%|break\n', ), 'continue_stmt': ( '%|continue\n', ), - 'jcontinue_stmt': ( '%|continue\n', ), - 'raise_stmt': ( '%|raise %[0]C\n', (0,sys.maxint,', ') ), + + 'raise_stmt0': ( '%|raise\n', ), + 'raise_stmt1': ( '%|raise %c\n', 0), + 'raise_stmt2': ( '%|raise %c, %c\n', 0, 1), + 'raise_stmt3': ( '%|raise %c, %c, %c\n', 0, 1, 2), # 'yield': ( 'yield %c', 0), # 'return_stmt': ( '%|return %c\n', 0), @@ -365,12 +370,15 @@ PRECEDENCE = { 'unary_not': 22, 'and': 24, + 'ret_and': 24, 'or': 26, + 'ret_or': 26, 'conditional': 28, - 'conditionaland': 28, 'conditionalnot': 28, + 'ret_cond': 28, + 'ret_cond_not': 28, '_mklambda': 30, 'yield': 101 @@ -593,7 +601,7 @@ class Walker(GenericASTTraversal, object): self.prune() else: self.write(self.indent, 'return') - if self.return_none or node != AST('return_stmt', [NONE, Token('RETURN_VALUE')]): + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_VALUE')]): self.write(' ') self.preorder(node[0]) self.print_() @@ -605,7 +613,7 @@ class Walker(GenericASTTraversal, object): self.prune() else: self.write(self.indent, 'return') - if self.return_none or node != AST('return_if_stmt', [NONE, Token('RETURN_END_IF')]): + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_END_IF')]): self.write(' ') self.preorder(node[0]) self.print_() @@ -667,7 +675,15 @@ class Walker(GenericASTTraversal, object): self.preorder(node[0]) self.prec = p self.prune() - + + def n_ret_expr(self, node): + if len(node) == 1 and node[0] == 'expr': + self.n_expr(node[0]) + else: + self.n_expr(node) + + n_ret_expr_or_cond = n_expr + def n_binary_expr(self, node): self.preorder(node[0]) self.write(' ') @@ -1073,6 +1089,11 @@ class Walker(GenericASTTraversal, object): n[0].type = 'unpack_w_parens' self.default(node) + def n_except_cond2(self, node): + if node[5][0] == 'unpack': + node[5][0].type = 'unpack_w_parens' + self.default(node) + def engine(self, entry, startnode): #self.print_("-----") #self.print_(str(startnode.__dict__))