Start to improve detect_structure for 2.7 and 2.x

Add debug flag to find_jump_targets to show the structure we found.
When there are control-flow bugs, it's often reflected here.

scanner3.py: make code make more similar to 2.x code
This commit is contained in:
rocky
2016-11-20 02:38:59 -05:00
parent fe36c9e9f6
commit f2a3721d7d
4 changed files with 124 additions and 59 deletions

View File

@@ -8,6 +8,18 @@ def bug(state, slotstate):
for key, value in slotstate.items():
setattr(state, key, 2)
# From 2.7 disassemble
# Problem is not getting while, because
# COME_FROM not added
def bug_loop(disassemble, tb=None):
if tb:
try:
tb = 5
except AttributeError:
raise RuntimeError
while tb: tb = tb.tb_next
disassemble(tb)
def test_if_in_for():
code = bug.__code__
scan = get_scanner(PYTHON_VERSION)
@@ -16,18 +28,35 @@ def test_if_in_for():
n = scan.setup_code(code)
scan.build_lines_data(code, n)
scan.build_prev_op(n)
fjt = scan.find_jump_targets()
fjt = scan.find_jump_targets(False)
assert {15: [3], 69: [66], 63: [18]} == fjt
assert scan.structs == \
[{'start': 0, 'end': 72, 'type': 'root'},
{'start': 18, 'end': 66, 'type': 'if-then'},
{'start': 15, 'end': 66, 'type': 'if-then'},
{'start': 31, 'end': 59, 'type': 'for-loop'},
{'start': 62, 'end': 63, 'type': 'for-else'}]
code = bug_loop.__code__
n = scan.setup_code(code)
scan.build_lines_data(code, n)
scan.build_prev_op(n)
fjt = scan.find_jump_targets(False)
assert{64: [42], 67: [42], 42: [16, 41], 19: [6]} == fjt
assert scan.structs == [
{'start': 0, 'end': 80, 'type': 'root'},
{'start': 3, 'end': 64, 'type': 'if-then'},
{'start': 6, 'end': 15, 'type': 'try'},
{'start': 19, 'end': 38, 'type': 'except'},
{'start': 45, 'end': 67, 'type': 'while-loop'},
{'start': 70, 'end': 64, 'type': 'while-else'},
# previous bug was not mistaking while-loop for if-then
{'start': 48, 'end': 67, 'type': 'while-loop'}]
elif 3.2 < PYTHON_VERSION <= 3.4:
scan.code = array('B', code.co_code)
scan.build_lines_data(code)
scan.build_prev_op()
fjt = scan.find_jump_targets()
fjt = scan.find_jump_targets(False)
assert {69: [66], 63: [18]} == fjt
assert scan.structs == \
[{'end': 72, 'type': 'root', 'start': 0},

View File

@@ -138,7 +138,7 @@ class Scanner2(scan.Scanner):
if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3)
jump_targets = self.find_jump_targets()
jump_targets = self.find_jump_targets(show_asm)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
@@ -353,7 +353,7 @@ class Scanner2(scan.Scanner):
j+=1
return
def build_stmt_indices(self):
def build_statement_indices(self):
code = self.code
start = 0
end = len(code)
@@ -430,10 +430,10 @@ class Scanner2(scan.Scanner):
slist += [end] * (end-len(slist))
def next_except_jump(self, start):
'''
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
'''
"""
if self.code[start] == self.opc.DUP_TOP:
except_match = self.first_instr(start, len(self.code), self.opc.PJIF)
@@ -467,11 +467,11 @@ class Scanner2(scan.Scanner):
elif op in self.setup_ops:
count_SETUP_ += 1
def detect_structure(self, pos, op):
'''
def detect_structure(self, offset, op):
"""
Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+
'''
"""
# TODO: check the struct boundaries more precisely -Dan
@@ -484,7 +484,7 @@ class Scanner2(scan.Scanner):
for struct in self.structs:
_start = struct['start']
_end = struct['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
if (_start <= offset < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = struct
@@ -496,14 +496,16 @@ class Scanner2(scan.Scanner):
# Try to find the jump_back instruction of the loop.
# It could be a return instruction.
start = pos+3
target = self.get_target(pos, op)
start = offset+3
target = self.get_target(offset, op)
end = self.restrict_to_parent(target, parent)
self.setup_loop_targets[offset] = target
self.setup_loops[target] = offset
if target != end:
self.fixed_jumps[pos] = end
self.fixed_jumps[offset] = end
(line_no, next_line_byte) = self.lines[pos]
(line_no, next_line_byte) = self.lines[offset]
jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
next_line_byte, False)
@@ -567,10 +569,10 @@ class Scanner2(scan.Scanner):
if end > jump_back+4 and code[end] in self.jump_forward:
if code[jump_back+4] in self.jump_forward:
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
self.fixed_jumps[offset] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
elif target < offset:
self.fixed_jumps[offset] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, self.opc.JUMP_ABSOLUTE)
@@ -586,7 +588,7 @@ class Scanner2(scan.Scanner):
else:
test = self.prev[next_line_byte]
if test == pos:
if test == offset:
loop_type = 'while 1'
elif self.code[test] in self.opc.hasjabs + self.opc.hasjrel:
self.ignore_if.add(test)
@@ -603,15 +605,15 @@ class Scanner2(scan.Scanner):
'start': jump_back+3,
'end': end})
elif op == self.opc.SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
start = offset+3
target = self.get_target(offset, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
self.fixed_jumps[offset] = end
# print target, end, parent
# Add the try block
self.structs.append({'type': 'try',
'start': start,
'start': start-3,
'end': end-4})
# Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
@@ -655,15 +657,15 @@ class Scanner2(scan.Scanner):
self.fixed_jumps[i] = i+1
elif op in self.pop_jump_if:
target = self.get_target(pos, op)
target = self.get_target(offset, op)
rtarget = self.restrict_to_parent(target, parent)
# Do not let jump to go out of parent struct bounds
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
self.fixed_jumps[offset] = rtarget
return
start = pos+3
start = offset+3
pre = self.prev
# Does this jump to right after another conditional jump that is
@@ -678,8 +680,8 @@ class Scanner2(scan.Scanner):
op_testset = self.pop_jump_if_or_pop | self.pop_jump_if
if ( code[pre[target]] in op_testset
and (target > pos) ):
self.fixed_jumps[pos] = pre[target]
and (target > offset) ):
self.fixed_jumps[offset] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
@@ -691,7 +693,7 @@ class Scanner2(scan.Scanner):
# Search for other POP_JUMP_IF_FALSE targetting the same op,
# in current statement, starting from current offset, and filter
# everything inside inner 'or' jumps and midline ifs
match = self.rem_or(start, self.next_stmt[pos], self.opc.PJIF, target)
match = self.rem_or(start, self.next_stmt[offset], self.opc.PJIF, target)
# If we still have any offsets in set, start working on it
if match:
@@ -699,13 +701,13 @@ class Scanner2(scan.Scanner):
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == self.opc.JUMP_ABSOLUTE \
and self.remove_mid_line_ifs([pos]) \
and self.remove_mid_line_ifs([offset]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], self.pop_jump_if, target))):
pass
elif code[pre[pre[rtarget]]] == self.opc.RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and self.remove_mid_line_ifs([offset]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
pre[pre[rtarget]],
self.pop_jump_if, target)))
@@ -714,7 +716,7 @@ class Scanner2(scan.Scanner):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], self.opc.PJIF)
jump_ifs = self.all_instr(start, self.next_stmt[offset], self.opc.PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
@@ -723,53 +725,53 @@ class Scanner2(scan.Scanner):
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
self.fixed_jumps[offset] = fix or match[-1]
return
else:
if (self.version < 2.7
and parent['type'] in ('root', 'for-loop', 'if-then',
'if-else', 'try')):
self.fixed_jumps[pos] = rtarget
self.fixed_jumps[offset] = rtarget
else:
# note test for < 2.7 might be superflous although informative
# for 2.7 a different branch is taken and the below code is handled
# under: elif op in self.pop_jump_if_or_pop
# below
self.fixed_jumps[pos] = match[-1]
self.fixed_jumps[offset] = match[-1]
return
else: # op != self.opc.PJIT
if self.version < 2.7 and code[pos+3] == self.opc.POP_TOP:
assert_pos = pos + 4
if self.version < 2.7 and code[offset+3] == self.opc.POP_TOP:
assert_offset = offset + 4
else:
assert_pos = pos + 3
if (assert_pos) in self.load_asserts:
assert_offset = offset + 3
if (assert_offset) in self.load_asserts:
if code[pre[rtarget]] == self.opc.RAISE_VARARGS:
return
self.load_asserts.remove(assert_pos)
self.load_asserts.remove(assert_offset)
next = self.next_stmt[pos]
if pre[next] == pos:
next = self.next_stmt[offset]
if pre[next] == offset:
pass
elif code[next] in self.jump_forward and target == self.get_target(next):
if code[pre[next]] == self.opc.PJIF:
if code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[pre[pre[rtarget]]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
self.fixed_jumps[offset] = pre[next]
return
elif code[next] == self.opc.JUMP_ABSOLUTE and code[target] in self.jump_forward:
next_target = self.get_target(next)
if self.get_target(target) == next_target:
self.fixed_jumps[pos] = pre[next]
self.fixed_jumps[offset] = pre[next]
return
elif code[next_target] in self.jump_forward and self.get_target(next_target) == self.get_target(target):
self.fixed_jumps[pos] = pre[next]
self.fixed_jumps[offset] = pre[next]
return
# don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
if offset in self.ignore_if:
return
if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos:
and pre[rtarget] != offset and pre[pre[rtarget]] != offset:
if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK:
if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE:
pass
@@ -787,14 +789,28 @@ class Scanner2(scan.Scanner):
if_end = self.get_target(pre_rtarget)
# Is this a loop and not an "if" statment?
if (if_end < pre_rtarget) and (code[pre[if_end]] == self.opc.SETUP_LOOP):
if(if_end > start):
if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets):
if (if_end > start):
return
else:
# We still have the case in 2.7 that the next instruction
# is a jump to a SETUP_LOOP target.
next_offset = target + self.op_size(self.code[target])
next_op = self.code[next_offset]
if self.opc.opname[next_op] == 'JUMP_FORWARD':
jump_target = self.get_target(next_offset, next_op)
if jump_target in self.setup_loops:
self.structs.append({'type': 'while-loop',
'start': start - 3,
'end': jump_target})
self.fixed_jumps[start-3] = jump_target
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'start': start-3,
'end': pre_rtarget})
self.not_continue.add(pre_rtarget)
@@ -811,13 +827,18 @@ class Scanner2(scan.Scanner):
self.return_end_ifs.add(pre_rtarget)
elif op in self.pop_jump_if_or_pop:
target = self.get_target(pos, op)
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
target = self.get_target(offset, op)
self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
def find_jump_targets(self):
def find_jump_targets(self, debug):
"""
Detect all offsets in a byte code which are jump targets
where we might insert a COME_FROM instruction.
where we might insert a pseudo "COME_FROM" instruction.
"COME_FROM" instructions are used in detecting overall
control flow. The more detailed information about the
control flow is captured in self.structs.
Since this stuff is tricky, consult self.structs when
something goes amiss.
Return the list of offsets. An instruction can be jumped
to in from multiple instructions.
@@ -833,11 +854,13 @@ class Scanner2(scan.Scanner):
# Map fixed jumps to their real destination
self.fixed_jumps = {}
self.ignore_if = set()
self.build_stmt_indices()
self.build_statement_indices()
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
self.setup_loop_targets = {} # target given setup_loop offset
self.setup_loops = {} # setup_loop offset given target
targets = {}
for offset in self.op_range(0, n):
@@ -883,9 +906,10 @@ class Scanner2(scan.Scanner):
and code[offset+4] == self.opc.END_FINALLY))):
# FIXME: rocky: I think we need something like this...
if offset not in set(self.ignore_if) or self.version == 2.7:
targets[label] = targets.get(label, []) + [offset]
# targets[label] = targets.get(label, []) + [offset]
if offset not in set(self.ignore_if):
source = (self.setup_loops[label]
if label in self.setup_loops else offset)
targets[label] = targets.get(label, []) + [source]
pass
pass
@@ -893,6 +917,14 @@ class Scanner2(scan.Scanner):
elif op == self.opc.END_FINALLY and offset in self.fixed_jumps and self.version == 2.7:
label = self.fixed_jumps[offset]
targets[label] = targets.get(label, []) + [offset]
pass
pass
# DEBUG:
if debug in ('both', 'after'):
import pprint as pp
pp.pprint(self.structs)
return targets
# FIXME: combine with scanner3.py code and put into scanner.py

View File

@@ -130,7 +130,7 @@ class Scanner26(scan.Scanner2):
if names[self.get_argument(i+4)] == 'AssertionError':
self.load_asserts.add(i+4)
jump_targets = self.find_jump_targets()
jump_targets = self.find_jump_targets(False)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]

View File

@@ -428,6 +428,8 @@ class Scanner3(Scanner):
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
self.setup_loop_targets = {} # target given setup_loop offset
self.setup_loops = {} # setup_loop offset given target
targets = {}
for offset in self.op_range(0, n):
@@ -585,6 +587,8 @@ class Scanner3(Scanner):
start = offset+3
target = self.get_target(offset)
end = self.restrict_to_parent(target, parent)
self.setup_loop_targets[offset] = target
self.setup_loops[target] = offset
if target != end:
self.fixed_jumps[offset] = end