Start to improve detect_structure for 2.7 and 2.x

Add debug flag to find_jump_targets to show the structure we found.
When there are control-flow bugs, it's often reflected here.

scanner3.py: make code make more similar to 2.x code
This commit is contained in:
rocky
2016-11-20 02:38:59 -05:00
parent fe36c9e9f6
commit f2a3721d7d
4 changed files with 124 additions and 59 deletions

View File

@@ -8,6 +8,18 @@ def bug(state, slotstate):
for key, value in slotstate.items(): for key, value in slotstate.items():
setattr(state, key, 2) setattr(state, key, 2)
# From 2.7 disassemble
# Problem is not getting while, because
# COME_FROM not added
def bug_loop(disassemble, tb=None):
if tb:
try:
tb = 5
except AttributeError:
raise RuntimeError
while tb: tb = tb.tb_next
disassemble(tb)
def test_if_in_for(): def test_if_in_for():
code = bug.__code__ code = bug.__code__
scan = get_scanner(PYTHON_VERSION) scan = get_scanner(PYTHON_VERSION)
@@ -16,18 +28,35 @@ def test_if_in_for():
n = scan.setup_code(code) n = scan.setup_code(code)
scan.build_lines_data(code, n) scan.build_lines_data(code, n)
scan.build_prev_op(n) scan.build_prev_op(n)
fjt = scan.find_jump_targets() fjt = scan.find_jump_targets(False)
assert {15: [3], 69: [66], 63: [18]} == fjt assert {15: [3], 69: [66], 63: [18]} == fjt
assert scan.structs == \ assert scan.structs == \
[{'start': 0, 'end': 72, 'type': 'root'}, [{'start': 0, 'end': 72, 'type': 'root'},
{'start': 18, 'end': 66, 'type': 'if-then'}, {'start': 15, 'end': 66, 'type': 'if-then'},
{'start': 31, 'end': 59, 'type': 'for-loop'}, {'start': 31, 'end': 59, 'type': 'for-loop'},
{'start': 62, 'end': 63, 'type': 'for-else'}] {'start': 62, 'end': 63, 'type': 'for-else'}]
code = bug_loop.__code__
n = scan.setup_code(code)
scan.build_lines_data(code, n)
scan.build_prev_op(n)
fjt = scan.find_jump_targets(False)
assert{64: [42], 67: [42], 42: [16, 41], 19: [6]} == fjt
assert scan.structs == [
{'start': 0, 'end': 80, 'type': 'root'},
{'start': 3, 'end': 64, 'type': 'if-then'},
{'start': 6, 'end': 15, 'type': 'try'},
{'start': 19, 'end': 38, 'type': 'except'},
{'start': 45, 'end': 67, 'type': 'while-loop'},
{'start': 70, 'end': 64, 'type': 'while-else'},
# previous bug was not mistaking while-loop for if-then
{'start': 48, 'end': 67, 'type': 'while-loop'}]
elif 3.2 < PYTHON_VERSION <= 3.4: elif 3.2 < PYTHON_VERSION <= 3.4:
scan.code = array('B', code.co_code) scan.code = array('B', code.co_code)
scan.build_lines_data(code) scan.build_lines_data(code)
scan.build_prev_op() scan.build_prev_op()
fjt = scan.find_jump_targets() fjt = scan.find_jump_targets(False)
assert {69: [66], 63: [18]} == fjt assert {69: [66], 63: [18]} == fjt
assert scan.structs == \ assert scan.structs == \
[{'end': 72, 'type': 'root', 'start': 0}, [{'end': 72, 'type': 'root', 'start': 0},

View File

@@ -138,7 +138,7 @@ class Scanner2(scan.Scanner):
if names[self.get_argument(i+3)] == 'AssertionError': if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3) self.load_asserts.add(i+3)
jump_targets = self.find_jump_targets() jump_targets = self.find_jump_targets(show_asm)
# contains (code, [addrRefToCode]) # contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0] last_stmt = self.next_stmt[0]
@@ -353,7 +353,7 @@ class Scanner2(scan.Scanner):
j+=1 j+=1
return return
def build_stmt_indices(self): def build_statement_indices(self):
code = self.code code = self.code
start = 0 start = 0
end = len(code) end = len(code)
@@ -430,10 +430,10 @@ class Scanner2(scan.Scanner):
slist += [end] * (end-len(slist)) slist += [end] * (end-len(slist))
def next_except_jump(self, start): def next_except_jump(self, start):
''' """
Return the next jump that was generated by an except SomeException: Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found. construct in a try...except...else clause or None if not found.
''' """
if self.code[start] == self.opc.DUP_TOP: if self.code[start] == self.opc.DUP_TOP:
except_match = self.first_instr(start, len(self.code), self.opc.PJIF) except_match = self.first_instr(start, len(self.code), self.opc.PJIF)
@@ -467,11 +467,11 @@ class Scanner2(scan.Scanner):
elif op in self.setup_ops: elif op in self.setup_ops:
count_SETUP_ += 1 count_SETUP_ += 1
def detect_structure(self, pos, op): def detect_structure(self, offset, op):
''' """
Detect type of block structures and their boundaries to fix optimized jumps Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+ in python2.3+
''' """
# TODO: check the struct boundaries more precisely -Dan # TODO: check the struct boundaries more precisely -Dan
@@ -484,7 +484,7 @@ class Scanner2(scan.Scanner):
for struct in self.structs: for struct in self.structs:
_start = struct['start'] _start = struct['start']
_end = struct['end'] _end = struct['end']
if (_start <= pos < _end) and (_start >= start and _end <= end): if (_start <= offset < _end) and (_start >= start and _end <= end):
start = _start start = _start
end = _end end = _end
parent = struct parent = struct
@@ -496,14 +496,16 @@ class Scanner2(scan.Scanner):
# Try to find the jump_back instruction of the loop. # Try to find the jump_back instruction of the loop.
# It could be a return instruction. # It could be a return instruction.
start = pos+3 start = offset+3
target = self.get_target(pos, op) target = self.get_target(offset, op)
end = self.restrict_to_parent(target, parent) end = self.restrict_to_parent(target, parent)
self.setup_loop_targets[offset] = target
self.setup_loops[target] = offset
if target != end: if target != end:
self.fixed_jumps[pos] = end self.fixed_jumps[offset] = end
(line_no, next_line_byte) = self.lines[pos] (line_no, next_line_byte) = self.lines[offset]
jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
next_line_byte, False) next_line_byte, False)
@@ -567,10 +569,10 @@ class Scanner2(scan.Scanner):
if end > jump_back+4 and code[end] in self.jump_forward: if end > jump_back+4 and code[end] in self.jump_forward:
if code[jump_back+4] in self.jump_forward: if code[jump_back+4] in self.jump_forward:
if self.get_target(jump_back+4) == self.get_target(end): if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4 self.fixed_jumps[offset] = jump_back+4
end = jump_back+4 end = jump_back+4
elif target < pos: elif target < offset:
self.fixed_jumps[pos] = jump_back+4 self.fixed_jumps[offset] = jump_back+4
end = jump_back+4 end = jump_back+4
target = self.get_target(jump_back, self.opc.JUMP_ABSOLUTE) target = self.get_target(jump_back, self.opc.JUMP_ABSOLUTE)
@@ -586,7 +588,7 @@ class Scanner2(scan.Scanner):
else: else:
test = self.prev[next_line_byte] test = self.prev[next_line_byte]
if test == pos: if test == offset:
loop_type = 'while 1' loop_type = 'while 1'
elif self.code[test] in self.opc.hasjabs + self.opc.hasjrel: elif self.code[test] in self.opc.hasjabs + self.opc.hasjrel:
self.ignore_if.add(test) self.ignore_if.add(test)
@@ -603,15 +605,15 @@ class Scanner2(scan.Scanner):
'start': jump_back+3, 'start': jump_back+3,
'end': end}) 'end': end})
elif op == self.opc.SETUP_EXCEPT: elif op == self.opc.SETUP_EXCEPT:
start = pos+3 start = offset+3
target = self.get_target(pos, op) target = self.get_target(offset, op)
end = self.restrict_to_parent(target, parent) end = self.restrict_to_parent(target, parent)
if target != end: if target != end:
self.fixed_jumps[pos] = end self.fixed_jumps[offset] = end
# print target, end, parent # print target, end, parent
# Add the try block # Add the try block
self.structs.append({'type': 'try', self.structs.append({'type': 'try',
'start': start, 'start': start-3,
'end': end-4}) 'end': end-4})
# Now isolate the except and else blocks # Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end]) end_else = start_else = self.get_target(self.prev[end])
@@ -655,15 +657,15 @@ class Scanner2(scan.Scanner):
self.fixed_jumps[i] = i+1 self.fixed_jumps[i] = i+1
elif op in self.pop_jump_if: elif op in self.pop_jump_if:
target = self.get_target(pos, op) target = self.get_target(offset, op)
rtarget = self.restrict_to_parent(target, parent) rtarget = self.restrict_to_parent(target, parent)
# Do not let jump to go out of parent struct bounds # Do not let jump to go out of parent struct bounds
if target != rtarget and parent['type'] == 'and/or': if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget self.fixed_jumps[offset] = rtarget
return return
start = pos+3 start = offset+3
pre = self.prev pre = self.prev
# Does this jump to right after another conditional jump that is # Does this jump to right after another conditional jump that is
@@ -678,8 +680,8 @@ class Scanner2(scan.Scanner):
op_testset = self.pop_jump_if_or_pop | self.pop_jump_if op_testset = self.pop_jump_if_or_pop | self.pop_jump_if
if ( code[pre[target]] in op_testset if ( code[pre[target]] in op_testset
and (target > pos) ): and (target > offset) ):
self.fixed_jumps[pos] = pre[target] self.fixed_jumps[offset] = pre[target]
self.structs.append({'type': 'and/or', self.structs.append({'type': 'and/or',
'start': start, 'start': start,
'end': pre[target]}) 'end': pre[target]})
@@ -691,7 +693,7 @@ class Scanner2(scan.Scanner):
# Search for other POP_JUMP_IF_FALSE targetting the same op, # Search for other POP_JUMP_IF_FALSE targetting the same op,
# in current statement, starting from current offset, and filter # in current statement, starting from current offset, and filter
# everything inside inner 'or' jumps and midline ifs # everything inside inner 'or' jumps and midline ifs
match = self.rem_or(start, self.next_stmt[pos], self.opc.PJIF, target) match = self.rem_or(start, self.next_stmt[offset], self.opc.PJIF, target)
# If we still have any offsets in set, start working on it # If we still have any offsets in set, start working on it
if match: if match:
@@ -699,13 +701,13 @@ class Scanner2(scan.Scanner):
and pre[rtarget] not in self.stmts \ and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == self.opc.JUMP_ABSOLUTE \ if code[pre[pre[rtarget]]] == self.opc.JUMP_ABSOLUTE \
and self.remove_mid_line_ifs([pos]) \ and self.remove_mid_line_ifs([offset]) \
and target == self.get_target(pre[pre[rtarget]]) \ and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], self.pop_jump_if, target))): and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], self.pop_jump_if, target))):
pass pass
elif code[pre[pre[rtarget]]] == self.opc.RETURN_VALUE \ elif code[pre[pre[rtarget]]] == self.opc.RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \ and self.remove_mid_line_ifs([offset]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
pre[pre[rtarget]], pre[pre[rtarget]],
self.pop_jump_if, target))) self.pop_jump_if, target)))
@@ -714,7 +716,7 @@ class Scanner2(scan.Scanner):
pass pass
else: else:
fix = None fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], self.opc.PJIF) jump_ifs = self.all_instr(start, self.next_stmt[offset], self.opc.PJIF)
last_jump_good = True last_jump_good = True
for j in jump_ifs: for j in jump_ifs:
if target == self.get_target(j): if target == self.get_target(j):
@@ -723,53 +725,53 @@ class Scanner2(scan.Scanner):
break break
else: else:
last_jump_good = False last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1] self.fixed_jumps[offset] = fix or match[-1]
return return
else: else:
if (self.version < 2.7 if (self.version < 2.7
and parent['type'] in ('root', 'for-loop', 'if-then', and parent['type'] in ('root', 'for-loop', 'if-then',
'if-else', 'try')): 'if-else', 'try')):
self.fixed_jumps[pos] = rtarget self.fixed_jumps[offset] = rtarget
else: else:
# note test for < 2.7 might be superflous although informative # note test for < 2.7 might be superflous although informative
# for 2.7 a different branch is taken and the below code is handled # for 2.7 a different branch is taken and the below code is handled
# under: elif op in self.pop_jump_if_or_pop # under: elif op in self.pop_jump_if_or_pop
# below # below
self.fixed_jumps[pos] = match[-1] self.fixed_jumps[offset] = match[-1]
return return
else: # op != self.opc.PJIT else: # op != self.opc.PJIT
if self.version < 2.7 and code[pos+3] == self.opc.POP_TOP: if self.version < 2.7 and code[offset+3] == self.opc.POP_TOP:
assert_pos = pos + 4 assert_offset = offset + 4
else: else:
assert_pos = pos + 3 assert_offset = offset + 3
if (assert_pos) in self.load_asserts: if (assert_offset) in self.load_asserts:
if code[pre[rtarget]] == self.opc.RAISE_VARARGS: if code[pre[rtarget]] == self.opc.RAISE_VARARGS:
return return
self.load_asserts.remove(assert_pos) self.load_asserts.remove(assert_offset)
next = self.next_stmt[pos] next = self.next_stmt[offset]
if pre[next] == pos: if pre[next] == offset:
pass pass
elif code[next] in self.jump_forward and target == self.get_target(next): elif code[next] in self.jump_forward and target == self.get_target(next):
if code[pre[next]] == self.opc.PJIF: if code[pre[next]] == self.opc.PJIF:
if code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[pre[pre[rtarget]]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE): if code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[pre[pre[rtarget]]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE):
self.fixed_jumps[pos] = pre[next] self.fixed_jumps[offset] = pre[next]
return return
elif code[next] == self.opc.JUMP_ABSOLUTE and code[target] in self.jump_forward: elif code[next] == self.opc.JUMP_ABSOLUTE and code[target] in self.jump_forward:
next_target = self.get_target(next) next_target = self.get_target(next)
if self.get_target(target) == next_target: if self.get_target(target) == next_target:
self.fixed_jumps[pos] = pre[next] self.fixed_jumps[offset] = pre[next]
return return
elif code[next_target] in self.jump_forward and self.get_target(next_target) == self.get_target(target): elif code[next_target] in self.jump_forward and self.get_target(next_target) == self.get_target(target):
self.fixed_jumps[pos] = pre[next] self.fixed_jumps[offset] = pre[next]
return return
# don't add a struct for a while test, it's already taken care of # don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if: if offset in self.ignore_if:
return return
if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \ if code[pre[rtarget]] == self.opc.JUMP_ABSOLUTE and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos: and pre[rtarget] != offset and pre[pre[rtarget]] != offset:
if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK: if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget+3] == self.opc.POP_BLOCK:
if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE: if code[pre[pre[rtarget]]] != self.opc.JUMP_ABSOLUTE:
pass pass
@@ -787,14 +789,28 @@ class Scanner2(scan.Scanner):
if_end = self.get_target(pre_rtarget) if_end = self.get_target(pre_rtarget)
# Is this a loop and not an "if" statment? # Is this a loop and not an "if" statment?
if (if_end < pre_rtarget) and (code[pre[if_end]] == self.opc.SETUP_LOOP): if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets):
if(if_end > start):
if (if_end > start):
return return
else:
# We still have the case in 2.7 that the next instruction
# is a jump to a SETUP_LOOP target.
next_offset = target + self.op_size(self.code[target])
next_op = self.code[next_offset]
if self.opc.opname[next_op] == 'JUMP_FORWARD':
jump_target = self.get_target(next_offset, next_op)
if jump_target in self.setup_loops:
self.structs.append({'type': 'while-loop',
'start': start - 3,
'end': jump_target})
self.fixed_jumps[start-3] = jump_target
return
end = self.restrict_to_parent(if_end, parent) end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then', self.structs.append({'type': 'if-then',
'start': start, 'start': start-3,
'end': pre_rtarget}) 'end': pre_rtarget})
self.not_continue.add(pre_rtarget) self.not_continue.add(pre_rtarget)
@@ -811,13 +827,18 @@ class Scanner2(scan.Scanner):
self.return_end_ifs.add(pre_rtarget) self.return_end_ifs.add(pre_rtarget)
elif op in self.pop_jump_if_or_pop: elif op in self.pop_jump_if_or_pop:
target = self.get_target(pos, op) target = self.get_target(offset, op)
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
def find_jump_targets(self): def find_jump_targets(self, debug):
""" """
Detect all offsets in a byte code which are jump targets Detect all offsets in a byte code which are jump targets
where we might insert a COME_FROM instruction. where we might insert a pseudo "COME_FROM" instruction.
"COME_FROM" instructions are used in detecting overall
control flow. The more detailed information about the
control flow is captured in self.structs.
Since this stuff is tricky, consult self.structs when
something goes amiss.
Return the list of offsets. An instruction can be jumped Return the list of offsets. An instruction can be jumped
to in from multiple instructions. to in from multiple instructions.
@@ -833,11 +854,13 @@ class Scanner2(scan.Scanner):
# Map fixed jumps to their real destination # Map fixed jumps to their real destination
self.fixed_jumps = {} self.fixed_jumps = {}
self.ignore_if = set() self.ignore_if = set()
self.build_stmt_indices() self.build_statement_indices()
# Containers filled by detect_structure() # Containers filled by detect_structure()
self.not_continue = set() self.not_continue = set()
self.return_end_ifs = set() self.return_end_ifs = set()
self.setup_loop_targets = {} # target given setup_loop offset
self.setup_loops = {} # setup_loop offset given target
targets = {} targets = {}
for offset in self.op_range(0, n): for offset in self.op_range(0, n):
@@ -883,9 +906,10 @@ class Scanner2(scan.Scanner):
and code[offset+4] == self.opc.END_FINALLY))): and code[offset+4] == self.opc.END_FINALLY))):
# FIXME: rocky: I think we need something like this... # FIXME: rocky: I think we need something like this...
if offset not in set(self.ignore_if) or self.version == 2.7: if offset not in set(self.ignore_if):
targets[label] = targets.get(label, []) + [offset] source = (self.setup_loops[label]
# targets[label] = targets.get(label, []) + [offset] if label in self.setup_loops else offset)
targets[label] = targets.get(label, []) + [source]
pass pass
pass pass
@@ -893,6 +917,14 @@ class Scanner2(scan.Scanner):
elif op == self.opc.END_FINALLY and offset in self.fixed_jumps and self.version == 2.7: elif op == self.opc.END_FINALLY and offset in self.fixed_jumps and self.version == 2.7:
label = self.fixed_jumps[offset] label = self.fixed_jumps[offset]
targets[label] = targets.get(label, []) + [offset] targets[label] = targets.get(label, []) + [offset]
pass
pass
# DEBUG:
if debug in ('both', 'after'):
import pprint as pp
pp.pprint(self.structs)
return targets return targets
# FIXME: combine with scanner3.py code and put into scanner.py # FIXME: combine with scanner3.py code and put into scanner.py

View File

@@ -130,7 +130,7 @@ class Scanner26(scan.Scanner2):
if names[self.get_argument(i+4)] == 'AssertionError': if names[self.get_argument(i+4)] == 'AssertionError':
self.load_asserts.add(i+4) self.load_asserts.add(i+4)
jump_targets = self.find_jump_targets() jump_targets = self.find_jump_targets(False)
# contains (code, [addrRefToCode]) # contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0] last_stmt = self.next_stmt[0]

View File

@@ -428,6 +428,8 @@ class Scanner3(Scanner):
# Containers filled by detect_structure() # Containers filled by detect_structure()
self.not_continue = set() self.not_continue = set()
self.return_end_ifs = set() self.return_end_ifs = set()
self.setup_loop_targets = {} # target given setup_loop offset
self.setup_loops = {} # setup_loop offset given target
targets = {} targets = {}
for offset in self.op_range(0, n): for offset in self.op_range(0, n):
@@ -585,6 +587,8 @@ class Scanner3(Scanner):
start = offset+3 start = offset+3
target = self.get_target(offset) target = self.get_target(offset)
end = self.restrict_to_parent(target, parent) end = self.restrict_to_parent(target, parent)
self.setup_loop_targets[offset] = target
self.setup_loops[target] = offset
if target != end: if target != end:
self.fixed_jumps[offset] = end self.fixed_jumps[offset] = end