You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-03 00:45:53 +08:00
Towards better 3.0 decompilation
Sync scanner2 and scanner3 better
This commit is contained in:
@@ -484,20 +484,21 @@ class Scanner2(scan.Scanner):
|
||||
in python2.3+
|
||||
"""
|
||||
|
||||
# TODO: check the struct boundaries more precisely -Dan
|
||||
|
||||
code = self.code
|
||||
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
end = parent['end']
|
||||
|
||||
# Pick inner-most parent for our offset
|
||||
for struct in self.structs:
|
||||
_start = struct['start']
|
||||
_end = struct['end']
|
||||
if (_start <= offset < _end) and (_start >= start and _end <= end):
|
||||
start = _start
|
||||
end = _end
|
||||
current_start = struct['start']
|
||||
current_end = struct['end']
|
||||
if ((current_start <= offset < current_end)
|
||||
and (current_start >= start and current_end <= end)):
|
||||
start = current_start
|
||||
end = current_end
|
||||
parent = struct
|
||||
|
||||
if op == self.opc.SETUP_LOOP:
|
||||
|
@@ -322,6 +322,7 @@ class Scanner3(Scanner):
|
||||
if target <= inst.offset:
|
||||
next_opname = self.opname[self.code[inst.offset+3]]
|
||||
if (inst.offset in self.stmts and
|
||||
#if (hasattr(inst, 'linestart') and
|
||||
(next_opname not in ('END_FINALLY', 'POP_BLOCK',
|
||||
# Python 3.0 only uses POP_TOP
|
||||
'POP_TOP'))):
|
||||
@@ -499,7 +500,7 @@ class Scanner3(Scanner):
|
||||
prelim = self.all_instr(start, end, self.statement_opcodes)
|
||||
|
||||
# Initialize final container with statements with
|
||||
# preliminnary data
|
||||
# preliminary data
|
||||
stmts = self.stmts = set(prelim)
|
||||
|
||||
# Same for opcode sequences
|
||||
@@ -578,11 +579,11 @@ class Scanner3(Scanner):
|
||||
op = self.code[offset]
|
||||
if self.version >= 3.6:
|
||||
target = self.code[offset+1]
|
||||
if op in op3.hasjrel:
|
||||
if op in self.opc.hasjrel:
|
||||
target += offset + 2
|
||||
else:
|
||||
target = self.code[offset+1] + self.code[offset+2] * 256
|
||||
if op in op3.hasjrel:
|
||||
if op in self.opc.hasjrel:
|
||||
target += offset + 3
|
||||
|
||||
return target
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2016 by Rocky Bernstein
|
||||
# Copyright (c) 2016, 2017 by Rocky Bernstein
|
||||
"""
|
||||
Python 3.0 bytecode scanner/deparser
|
||||
|
||||
@@ -12,6 +12,8 @@ from __future__ import print_function
|
||||
from xdis.opcodes import opcode_30 as opc
|
||||
JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs)
|
||||
|
||||
JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE])
|
||||
|
||||
from uncompyle6.scanners.scanner3 import Scanner3
|
||||
class Scanner30(Scanner3):
|
||||
|
||||
@@ -20,6 +22,373 @@ class Scanner30(Scanner3):
|
||||
return
|
||||
pass
|
||||
|
||||
def detect_control_flow(self, offset, targets):
|
||||
"""
|
||||
Detect structures and their boundaries to fix optimized jumps
|
||||
Python 3.0 is more like Python 2.6 than it is Python 3.x.
|
||||
So we have a special routine here.
|
||||
"""
|
||||
|
||||
code = self.code
|
||||
op = code[offset]
|
||||
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
end = parent['end']
|
||||
|
||||
# Pick inner-most parent for our offset
|
||||
for struct in self.structs:
|
||||
current_start = struct['start']
|
||||
current_end = struct['end']
|
||||
if ((current_start <= offset < current_end)
|
||||
and (current_start >= start and current_end <= end)):
|
||||
start = current_start
|
||||
end = current_end
|
||||
parent = struct
|
||||
|
||||
if op == self.opc.SETUP_LOOP:
|
||||
# We categorize loop types: 'for', 'while', 'while 1' with
|
||||
# possibly suffixes '-loop' and '-else'
|
||||
# Try to find the jump_back instruction of the loop.
|
||||
# It could be a return instruction.
|
||||
|
||||
start = offset+3
|
||||
target = self.get_target(offset)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
self.setup_loop_targets[offset] = target
|
||||
self.setup_loops[target] = offset
|
||||
|
||||
if target != end:
|
||||
self.fixed_jumps[offset] = end
|
||||
|
||||
(line_no, next_line_byte) = self.lines[offset]
|
||||
jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
|
||||
next_line_byte, False)
|
||||
|
||||
if jump_back:
|
||||
jump_forward_offset = jump_back+3
|
||||
else:
|
||||
jump_forward_offset = None
|
||||
|
||||
return_val_offset1 = self.prev[self.prev[end]]
|
||||
|
||||
if (jump_back and jump_back != self.prev_op[end]
|
||||
and self.is_jump_forward(jump_forward_offset)):
|
||||
if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or
|
||||
(code[self.prev_op[end]] == self.opc.POP_BLOCK
|
||||
and code[return_val_offset1] == self.opc.RETURN_VALUE)):
|
||||
jump_back = None
|
||||
if not jump_back:
|
||||
# loop suite ends in return
|
||||
jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
|
||||
if not jump_back:
|
||||
return
|
||||
|
||||
jump_back += 2
|
||||
if_offset = None
|
||||
if code[self.prev_op[next_line_byte]] not in JUMP_TF:
|
||||
if_offset = self.prev[next_line_byte]
|
||||
if if_offset:
|
||||
loop_type = 'while'
|
||||
self.ignore_if.add(if_offset)
|
||||
else:
|
||||
loop_type = 'for'
|
||||
target = next_line_byte
|
||||
end = jump_back + 3
|
||||
else:
|
||||
if self.get_target(jump_back) >= next_line_byte:
|
||||
jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False)
|
||||
if end > jump_back+4 and self.is_jump_forward(end):
|
||||
if self.is_jump_forward(jump_back+4):
|
||||
if self.get_target(jump_back+4) == self.get_target(end):
|
||||
self.fixed_jumps[offset] = jump_back+4
|
||||
end = jump_back+4
|
||||
elif target < offset:
|
||||
self.fixed_jumps[offset] = jump_back+4
|
||||
end = jump_back+4
|
||||
|
||||
target = self.get_target(jump_back)
|
||||
|
||||
if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
test = self.prev_op[next_line_byte]
|
||||
|
||||
if test == offset:
|
||||
loop_type = 'while 1'
|
||||
elif self.code[test] in opc.hasjabs+opc.hasjrel:
|
||||
self.ignore_if.add(test)
|
||||
test_target = self.get_target(test)
|
||||
if test_target > (jump_back+3):
|
||||
jump_back = test_target
|
||||
self.not_continue.add(jump_back)
|
||||
self.loops.append(target)
|
||||
self.structs.append({'type': loop_type + '-loop',
|
||||
'start': target,
|
||||
'end': jump_back})
|
||||
if jump_back+3 != end:
|
||||
self.structs.append({'type': loop_type + '-else',
|
||||
'start': jump_back+3,
|
||||
'end': end})
|
||||
elif op in JUMP_TF:
|
||||
start = offset + self.op_size(op)
|
||||
target = self.get_target(offset)
|
||||
rtarget = self.restrict_to_parent(target, parent)
|
||||
prev_op = self.prev_op
|
||||
|
||||
# Do not let jump to go out of parent struct bounds
|
||||
if target != rtarget and parent['type'] == 'and/or':
|
||||
self.fixed_jumps[offset] = rtarget
|
||||
return
|
||||
|
||||
# Does this jump to right after another conditional jump that is
|
||||
# not myself? If so, it's part of a larger conditional.
|
||||
# rocky: if we have a conditional jump to the next instruction, then
|
||||
# possibly I am "skipping over" a "pass" or null statement.
|
||||
|
||||
if ((code[prev_op[target]] in self.pop_jump_if_pop) and
|
||||
(target > offset) and prev_op[target] != offset):
|
||||
self.fixed_jumps[offset] = prev_op[target]
|
||||
self.structs.append({'type': 'and/or',
|
||||
'start': start,
|
||||
'end': prev_op[target]})
|
||||
return
|
||||
|
||||
# The op offset just before the target jump offset is important
|
||||
# in making a determination of what we have. Save that.
|
||||
pre_rtarget = prev_op[rtarget]
|
||||
|
||||
# Is it an "and" inside an "if" or "while" block
|
||||
if op == opc.JUMP_IF_FALSE:
|
||||
|
||||
# Search for another JUMP_IF_FALSE targetting the same op,
|
||||
# in current statement, starting from current offset, and filter
|
||||
# everything inside inner 'or' jumps and midline ifs
|
||||
match = self.rem_or(start, self.next_stmt[offset],
|
||||
opc.JUMP_IF_FALSE, target)
|
||||
|
||||
# If we still have any offsets in set, start working on it
|
||||
if match:
|
||||
is_jump_forward = self.is_jump_forward(pre_rtarget)
|
||||
if (is_jump_forward and pre_rtarget not in self.stmts and
|
||||
self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget):
|
||||
if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
|
||||
and self.remove_mid_line_ifs([offset]) and
|
||||
target == self.get_target(prev_op[pre_rtarget]) and
|
||||
(prev_op[pre_rtarget] not in self.stmts or
|
||||
self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and
|
||||
1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], JUMP_TF, target)))):
|
||||
pass
|
||||
elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
|
||||
and self.remove_mid_line_ifs([offset]) and
|
||||
1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
|
||||
JUMP_TF, target))) |
|
||||
set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
|
||||
(opc.JUMP_IF_FALSE,
|
||||
opc.JUMP_IF_TRUE,
|
||||
opc.JUMP_ABSOLUTE),
|
||||
pre_rtarget, True)))))):
|
||||
pass
|
||||
else:
|
||||
fix = None
|
||||
jump_ifs = self.all_instr(start, self.next_stmt[offset],
|
||||
opc.JUMP_IF_FALSE)
|
||||
last_jump_good = True
|
||||
for j in jump_ifs:
|
||||
if target == self.get_target(j):
|
||||
if self.lines[j].next == j + 3 and last_jump_good:
|
||||
fix = j
|
||||
break
|
||||
else:
|
||||
last_jump_good = False
|
||||
self.fixed_jumps[offset] = fix or match[-1]
|
||||
return
|
||||
else:
|
||||
self.fixed_jumps[offset] = match[-1]
|
||||
return
|
||||
# op == JUMP_IF_TRUE
|
||||
else:
|
||||
next = self.next_stmt[offset]
|
||||
if prev_op[next] == offset:
|
||||
pass
|
||||
elif self.is_jump_forward(next) and target == self.get_target(next):
|
||||
if code[prev_op[next]] == opc.JUMP_IF_FALSE:
|
||||
if (code[next] == self.opc.JUMP_FORWARD
|
||||
or target != rtarget
|
||||
or code[prev_op[pre_rtarget]] not in
|
||||
(self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)):
|
||||
self.fixed_jumps[offset] = prev_op[next]
|
||||
return
|
||||
elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and
|
||||
self.get_target(target) == self.get_target(next)):
|
||||
self.fixed_jumps[offset] = prev_op[next]
|
||||
return
|
||||
|
||||
# Don't add a struct for a while test, it's already taken care of
|
||||
if offset in self.ignore_if:
|
||||
return
|
||||
|
||||
if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and
|
||||
pre_rtarget in self.stmts and
|
||||
pre_rtarget != offset and
|
||||
prev_op[pre_rtarget] != offset and
|
||||
not (code[rtarget] == self.opc.JUMP_ABSOLUTE and
|
||||
code[rtarget+3] == self.opc.POP_BLOCK and
|
||||
code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)):
|
||||
rtarget = pre_rtarget
|
||||
|
||||
# Does the "jump if" jump beyond a jump op?
|
||||
# That is, we have something like:
|
||||
# JUMP_IF_FALSE HERE
|
||||
# ...
|
||||
# JUMP_FORWARD
|
||||
# HERE:
|
||||
#
|
||||
# If so, this can be block inside an "if" statement
|
||||
# or a conditional assignment like:
|
||||
# x = 1 if x else 2
|
||||
#
|
||||
# There are other contexts we may need to consider
|
||||
# like whether the target is "END_FINALLY"
|
||||
# or if the condition jump is to a forward location
|
||||
if self.is_jump_forward(pre_rtarget):
|
||||
if_end = self.get_target(pre_rtarget)
|
||||
|
||||
# If the jump target is back, we are looping
|
||||
if (if_end < pre_rtarget and
|
||||
(code[prev_op[if_end]] == self.opc.SETUP_LOOP)):
|
||||
if (if_end > start):
|
||||
return
|
||||
|
||||
end = self.restrict_to_parent(if_end, parent)
|
||||
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': pre_rtarget})
|
||||
self.not_continue.add(pre_rtarget)
|
||||
|
||||
# if rtarget < end and (
|
||||
# code[rtarget] not in (self.opc.END_FINALLY,
|
||||
# self.opc.JUMP_ABSOLUTE) and
|
||||
# code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
|
||||
# self.opc.END_FINALLY)):
|
||||
# self.structs.append({'type': 'else',
|
||||
# 'start': rtarget,
|
||||
# 'end': end})
|
||||
# self.else_start[rtarget] = end
|
||||
elif self.is_jump_back(pre_rtarget):
|
||||
if_end = rtarget
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': pre_rtarget})
|
||||
self.not_continue.add(pre_rtarget)
|
||||
elif code[pre_rtarget] in (self.opc.RETURN_VALUE,
|
||||
self.opc.BREAK_LOOP):
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': rtarget})
|
||||
# It is important to distingish if this return is inside some sort
|
||||
# except block return
|
||||
jump_prev = prev_op[offset]
|
||||
if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
|
||||
if self.opc.cmp_op[code[jump_prev+1]] == 'exception match':
|
||||
return
|
||||
if self.version >= 3.5:
|
||||
# Python 3.5 may remove as dead code a JUMP
|
||||
# instruction after a RETURN_VALUE. So we check
|
||||
# based on seeing SETUP_EXCEPT various places.
|
||||
if code[rtarget] == self.opc.SETUP_EXCEPT:
|
||||
return
|
||||
# Check that next instruction after pops and jump is
|
||||
# not from SETUP_EXCEPT
|
||||
next_op = rtarget
|
||||
if code[next_op] == self.opc.POP_BLOCK:
|
||||
next_op += self.op_size(self.code[next_op])
|
||||
if code[next_op] == self.opc.JUMP_ABSOLUTE:
|
||||
next_op += self.op_size(self.code[next_op])
|
||||
if next_op in targets:
|
||||
for try_op in targets[next_op]:
|
||||
come_from_op = code[try_op]
|
||||
if come_from_op == self.opc.SETUP_EXCEPT:
|
||||
return
|
||||
pass
|
||||
pass
|
||||
if code[pre_rtarget] == self.opc.RETURN_VALUE:
|
||||
self.return_end_ifs.add(pre_rtarget)
|
||||
else:
|
||||
self.fixed_jumps[offset] = rtarget
|
||||
self.not_continue.add(pre_rtarget)
|
||||
|
||||
|
||||
elif op == self.opc.SETUP_EXCEPT:
|
||||
target = self.get_target(offset)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
self.fixed_jumps[offset] = end
|
||||
elif op == self.opc.SETUP_FINALLY:
|
||||
target = self.get_target(offset)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
self.fixed_jumps[offset] = end
|
||||
elif op in self.jump_if_pop:
|
||||
target = self.get_target(offset)
|
||||
if target > offset:
|
||||
unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target)
|
||||
if unop_target and code[unop_target+3] != self.opc.ROT_TWO:
|
||||
self.fixed_jumps[offset] = unop_target
|
||||
else:
|
||||
self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
|
||||
pass
|
||||
pass
|
||||
elif self.version >= 3.5:
|
||||
# 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
|
||||
# misclassified as RETURN_END_IF. Handle that here.
|
||||
# In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
|
||||
if op == self.opc.RETURN_VALUE:
|
||||
if (offset+1 < len(code) and code[offset+1] == self.opc.JUMP_ABSOLUTE and
|
||||
offset in self.return_end_ifs):
|
||||
self.return_end_ifs.remove(offset)
|
||||
pass
|
||||
pass
|
||||
elif op == self.opc.JUMP_FORWARD:
|
||||
# If we have:
|
||||
# JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
|
||||
# then RETURN_VALUE is not RETURN_END_IF
|
||||
rtarget = self.get_target(offset)
|
||||
rtarget_prev = self.prev[rtarget]
|
||||
if (code[rtarget_prev] == self.opc.RETURN_VALUE and
|
||||
rtarget_prev in self.return_end_ifs):
|
||||
i = rtarget_prev
|
||||
while i != offset:
|
||||
if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]:
|
||||
return
|
||||
i = self.prev[i]
|
||||
self.return_end_ifs.remove(rtarget_prev)
|
||||
pass
|
||||
return
|
||||
|
||||
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
|
||||
"""
|
||||
Find offsets of all requested <instr> between <start> and <end>,
|
||||
optionally <target>ing specified offset, and return list found
|
||||
<instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
|
||||
"""
|
||||
assert(start>=0 and end<=len(self.code) and start <= end)
|
||||
|
||||
# Find all offsets of requested instructions
|
||||
instr_offsets = self.all_instr(start, end, instr, target, include_beyond_target)
|
||||
# Get all JUMP_IF_TRUE (or) offsets
|
||||
pjit_offsets = self.all_instr(start, end, opc.JUMP_IF_TRUE)
|
||||
filtered = []
|
||||
for pjit_offset in pjit_offsets:
|
||||
pjit_tgt = self.get_target(pjit_offset) - 3
|
||||
for instr_offset in instr_offsets:
|
||||
if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
|
||||
filtered.append(instr_offset)
|
||||
instr_offsets = filtered
|
||||
filtered = []
|
||||
return instr_offsets
|
||||
|
||||
if __name__ == "__main__":
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 3.0:
|
||||
|
Reference in New Issue
Block a user