Merge branch 'master' into python-2.4

This commit is contained in:
rocky
2017-03-01 05:55:26 -05:00
12 changed files with 75 additions and 24 deletions

View File

@@ -112,7 +112,7 @@ with handling control flow. All of the Python decompilers I have looked
at have the same problem. In some cases we can detect an erroneous at have the same problem. In some cases we can detect an erroneous
decompilation and report that. decompilation and report that.
About 90% of the decompilation of Python standard library packages in Over 98% of the decompilation of Python standard library packages in
Python 2.7.12 verifies correctly. Over 99% of Python 2.7 and 3.3-3.5 Python 2.7.12 verifies correctly. Over 99% of Python 2.7 and 3.3-3.5
"weakly" verify. Python 2.6 drops down to 96% weakly verifying. "weakly" verify. Python 2.6 drops down to 96% weakly verifying.
Other versions drop off in quality too. Other versions drop off in quality too.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,6 @@
# From Python 2.7 parse_starttag HTMLParser.pyc
attrvalue = [1,2]
while attrvalue:
if attrvalue[:1] == 5 or \
attrvalue[:1] == 2 == attrvalue[-1:]:
attrvalue = 10

View File

@@ -30,12 +30,18 @@ class PythonParser(GenericASTBuilder):
super(PythonParser, self).__init__(AST, start, debug) super(PythonParser, self).__init__(AST, start, debug)
self.collect = [ self.collect = [
'stmts', 'except_stmts', '_stmts', 'stmts', 'except_stmts', '_stmts',
'exprlist', 'kvlist', 'kwargs', 'come_froms', 'exprlist', 'kvlist', 'kwargs', 'come_froms', '_come_from',
# Python < 3 # Python < 3
'print_items', 'print_items',
# PyPy: # PyPy:
'kvlist_n'] 'kvlist_n']
def ast_first_offset(self, ast):
if hasattr(ast, 'offset'):
return ast.offset
else:
return self.ast_first_offset(ast[0])
def add_unique_rule(self, rule, opname, count, customize): def add_unique_rule(self, rule, opname, count, customize):
"""Add rule to grammar, but only if it hasn't been added previously """Add rule to grammar, but only if it hasn't been added previously
opname and count are used in the customize() semantic the actions opname and count are used in the customize() semantic the actions

View File

@@ -157,7 +157,6 @@ class Python26Parser(Python2Parser):
# Semantic actions want the else to be at position 3 # Semantic actions want the else to be at position 3
ifelsestmt ::= testexpr c_stmts_opt jf_cf_pop else_suite come_froms ifelsestmt ::= testexpr c_stmts_opt jf_cf_pop else_suite come_froms
ifelsestmt ::= testexpr_then c_stmts_opt jf_cf_pop else_suite come_froms ifelsestmt ::= testexpr_then c_stmts_opt jf_cf_pop else_suite come_froms
ifelsestmt ::= testexpr c_stmts_opt filler else_suitel come_froms POP_TOP
ifelsestmt ::= testexpr_then c_stmts_opt filler else_suitel come_froms POP_TOP ifelsestmt ::= testexpr_then c_stmts_opt filler else_suitel come_froms POP_TOP
# Semantic actions want else_suitel to be at index 3 # Semantic actions want else_suitel to be at index 3

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016 Rocky Bernstein # Copyright (c) 2016-2017 Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de> # Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
@@ -96,6 +96,27 @@ class Python27Parser(Python2Parser):
while1stmt ::= SETUP_LOOP return_stmts COME_FROM while1stmt ::= SETUP_LOOP return_stmts COME_FROM
""" """
def add_custom_rules(self, tokens, customize):
super(Python27Parser, self).add_custom_rules(tokens, customize)
self.check_reduce['and'] = 'AST'
return
def reduce_is_invalid(self, rule, ast, tokens, first, last):
invalid = super(Python27Parser,
self).reduce_is_invalid(rule, ast,
tokens, first, last)
if invalid:
return invalid
if rule == ('and', ('expr', 'jmp_false', 'expr', '\\e_come_from_opt')):
# Test that jmp_false jumps to the end of "and"
# or that it jumps to the same place as the end of "and"
jmp_false = ast[1][0]
jmp_target = jmp_false.offset + jmp_false.attr + 3
return not (jmp_target == tokens[last].offset or
tokens[last].pattr == jmp_false.pattr)
return False
class Python27ParserSingle(Python27Parser, PythonParserSingle): class Python27ParserSingle(Python27Parser, PythonParserSingle):
pass pass

View File

@@ -64,6 +64,12 @@ class Scanner(object):
# FIXME: This weird Python2 behavior is not Python3 # FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass() self.resetTokenClass()
def opname_for_offset(self, offset):
return self.opc.opname[self.code[offset]]
def op_name(self, op):
return self.opc.opname[op]
def is_jump_forward(self, offset): def is_jump_forward(self, offset):
""" """
Return True if the code at offset is some sort of jump forward. Return True if the code at offset is some sort of jump forward.

View File

@@ -170,7 +170,7 @@ class Scanner2(scan.Scanner):
# continue # continue
# last_offset = jump_offset # last_offset = jump_offset
come_from_name = 'COME_FROM' come_from_name = 'COME_FROM'
op_name = self.opc.opname[self.code[jump_offset]] op_name = self.opname_for_offset(jump_offset)
if op_name.startswith('SETUP_') and self.version == 2.7: if op_name.startswith('SETUP_') and self.version == 2.7:
come_from_type = op_name[len('SETUP_'):] come_from_type = op_name[len('SETUP_'):]
if come_from_type not in ('LOOP', 'EXCEPT'): if come_from_type not in ('LOOP', 'EXCEPT'):
@@ -184,7 +184,7 @@ class Scanner2(scan.Scanner):
pass pass
op = self.code[offset] op = self.code[offset]
op_name = self.opc.opname[op] op_name = self.op_name(op)
oparg = None; pattr = None oparg = None; pattr = None
has_arg = op_has_argument(op, self.opc) has_arg = op_has_argument(op, self.opc)
@@ -412,7 +412,7 @@ class Scanner2(scan.Scanner):
while code[j] == self.opc.JUMP_ABSOLUTE: while code[j] == self.opc.JUMP_ABSOLUTE:
j = self.prev[j] j = self.prev[j]
if (self.version >= 2.3 and if (self.version >= 2.3 and
self.opc.opname[code[j]] == 'LIST_APPEND'): # list comprehension self.opname_for_offset(j) == 'LIST_APPEND'): # list comprehension
stmts.remove(s) stmts.remove(s)
continue continue
elif code[s] == self.opc.POP_TOP: elif code[s] == self.opc.POP_TOP:
@@ -850,7 +850,7 @@ class Scanner2(scan.Scanner):
# is a jump to a SETUP_LOOP target. # is a jump to a SETUP_LOOP target.
next_offset = target + self.op_size(self.code[target]) next_offset = target + self.op_size(self.code[target])
next_op = self.code[next_offset] next_op = self.code[next_offset]
if self.opc.opname[next_op] == 'JUMP_FORWARD': if self.op_name(next_op) == 'JUMP_FORWARD':
jump_target = self.get_target(next_offset, next_op) jump_target = self.get_target(next_offset, next_op)
if jump_target in self.setup_loops: if jump_target in self.setup_loops:
self.structs.append({'type': 'while-loop', self.structs.append({'type': 'while-loop',
@@ -888,12 +888,12 @@ class Scanner2(scan.Scanner):
# 39_0 COME_FROM 3 # 39_0 COME_FROM 3
# 40 ... # 40 ...
if self.opc.opname[code[jump_if_offset]].startswith('JUMP_IF'): if self.opname_for_offset(jump_if_offset).startswith('JUMP_IF'):
jump_if_target = code[jump_if_offset+1] jump_if_target = code[jump_if_offset+1]
if self.opc.opname[code[jump_if_target + jump_if_offset + 3]] == 'POP_TOP': if self.opname_for_offset(jump_if_target + jump_if_offset + 3) == 'POP_TOP':
jump_inst = jump_if_target + jump_if_offset jump_inst = jump_if_target + jump_if_offset
jump_offset = code[jump_inst+1] jump_offset = code[jump_inst+1]
jump_op = self.opc.opname[code[jump_inst]] jump_op = self.opname_for_offset(jump_inst)
if (jump_op == 'JUMP_FORWARD' and jump_offset == 1): if (jump_op == 'JUMP_FORWARD' and jump_offset == 1):
self.structs.append({'type': 'if-then', self.structs.append({'type': 'if-then',
'start': start-3, 'start': start-3,
@@ -928,7 +928,7 @@ class Scanner2(scan.Scanner):
# 256 # 256
if if_then_maybe and jump_op == 'JUMP_ABSOLUTE': if if_then_maybe and jump_op == 'JUMP_ABSOLUTE':
jump_target = self.get_target(jump_inst, code[jump_inst]) jump_target = self.get_target(jump_inst, code[jump_inst])
if self.opc.opname[code[end]] == 'JUMP_FORWARD': if self.opname_for_offset(end) == 'JUMP_FORWARD':
end_target = self.get_target(end, code[end]) end_target = self.get_target(end, code[end])
if jump_target == end_target: if jump_target == end_target:
self.structs.append(if_then_maybe) self.structs.append(if_then_maybe)
@@ -997,7 +997,7 @@ class Scanner2(scan.Scanner):
oparg = self.get_argument(offset) oparg = self.get_argument(offset)
if label is None: if label is None:
if op in self.opc.hasjrel and self.opc.opname[op] != 'FOR_ITER': if op in self.opc.hasjrel and self.op_name(op) != 'FOR_ITER':
# if (op in self.opc.hasjrel and # if (op in self.opc.hasjrel and
# (self.version < 2.0 or op != self.opc.FOR_ITER)): # (self.version < 2.0 or op != self.opc.FOR_ITER)):
label = offset + 3 + oparg label = offset + 3 + oparg

View File

@@ -137,9 +137,6 @@ class Scanner3(Scanner):
# FIXME: remove the above in favor of: # FIXME: remove the above in favor of:
# self.varargs_ops = frozenset(self.opc.hasvargs) # self.varargs_ops = frozenset(self.opc.hasvargs)
def opName(self, offset):
return self.opc.opname[self.code[offset]]
def ingest(self, co, classname=None, code_objects={}, show_asm=None): def ingest(self, co, classname=None, code_objects={}, show_asm=None):
""" """
Pick out tokens from an uncompyle6 code object, and transform them, Pick out tokens from an uncompyle6 code object, and transform them,
@@ -225,7 +222,7 @@ class Scanner3(Scanner):
# "loop" tag last so the grammar rule matches that properly. # "loop" tag last so the grammar rule matches that properly.
for jump_offset in sorted(jump_targets[inst.offset], reverse=True): for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
come_from_name = 'COME_FROM' come_from_name = 'COME_FROM'
opname = self.opName(jump_offset) opname = self.opname_for_offset(jump_offset)
if opname.startswith('SETUP_'): if opname.startswith('SETUP_'):
come_from_type = opname[len('SETUP_'):] come_from_type = opname[len('SETUP_'):]
come_from_name = 'COME_FROM_%s' % come_from_type come_from_name = 'COME_FROM_%s' % come_from_type
@@ -908,6 +905,14 @@ class Scanner3(Scanner):
target = self.get_target(offset) target = self.get_target(offset)
end = self.restrict_to_parent(target, parent) end = self.restrict_to_parent(target, parent)
self.fixed_jumps[offset] = end self.fixed_jumps[offset] = end
elif op == self.opc.POP_EXCEPT:
if self.version <= 3.5:
next_offset = offset+1
else:
next_offset = offset+2
target = self.get_target(next_offset)
if target > next_offset:
self.fixed_jumps[next_offset] = target
elif op == self.opc.SETUP_FINALLY: elif op == self.opc.SETUP_FINALLY:
target = self.get_target(offset) target = self.get_target(offset)
end = self.restrict_to_parent(target, parent) end = self.restrict_to_parent(target, parent)

View File

@@ -176,7 +176,7 @@ TABLE_DIRECT = {
'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3), 'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3),
'return_lambda': ('%c', 0), 'return_lambda': ('%c', 0),
'compare': ( '%p %[-1]{pattr} %p', (0, 19), (1, 19) ), 'compare': ( '%p %[-1]{pattr} %p', (0, 19), (1, 19) ),
'cmp_list': ( '%p %p', (0, 20), (1, 19)), 'cmp_list': ( '%p %p', (0, 29), (1, 30)),
'cmp_list1': ( '%[3]{pattr} %p %p', (0, 19), (-2, 19)), 'cmp_list1': ( '%[3]{pattr} %p %p', (0, 19), (-2, 19)),
'cmp_list2': ( '%[1]{pattr} %p', (0, 19)), 'cmp_list2': ( '%[1]{pattr} %p', (0, 19)),
# 'classdef': (), # handled by n_classdef() # 'classdef': (), # handled by n_classdef()
@@ -274,6 +274,10 @@ MAP = {
'exprlist': MAP_R0, 'exprlist': MAP_R0,
} }
# Operator precidence
# See https://docs.python.org/3/reference/expressions.html
# or https://docs.python.org/3/reference/expressions.html
# for a list.
PRECEDENCE = { PRECEDENCE = {
'build_list': 0, 'build_list': 0,
'mapexpr': 0, 'mapexpr': 0,

View File

@@ -329,18 +329,22 @@ def cmp_code_objects(version, is_pypy, code_obj1, code_obj2,
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2) tokens2[i2], tokens1, tokens2)
elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr: elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr:
dest1 = int(tokens1[i1].pattr)
dest2 = int(tokens2[i2].pattr)
if tokens1[i1].type == 'JUMP_BACK': if tokens1[i1].type == 'JUMP_BACK':
dest1 = int(tokens1[i1].pattr)
dest2 = int(tokens2[i2].pattr)
if offset_map[dest1] != dest2: if offset_map[dest1] != dest2:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2) tokens2[i2], tokens1, tokens2)
else: else:
# import pdb; pdb.set_trace() # import pdb; pdb.set_trace()
if dest1 in check_jumps: try:
check_jumps[dest1].append((i1, i2, dest2)) dest1 = int(tokens1[i1].pattr)
else: if dest1 in check_jumps:
check_jumps[dest1] = [(i1, i2, dest2)] check_jumps[dest1].append((i1, i2, dest2))
else:
check_jumps[dest1] = [(i1, i2, dest2)]
except:
pass
i1 += 1 i1 += 1
i2 += 1 i2 += 1