# Copyright (c) 2015 by Rocky Bernstein """ Python 3.4 bytecode scanner/deparser This overlaps Python's 3.4's dis module, and in fact in some cases we just fall back to that. But the intent is that it can be run from Python 2 and other versions of Python. Also, we save token information for later use in deparsing. """ from __future__ import print_function import dis, inspect from array import array import uncompyle6.scanners.scanner3 as scan3 from uncompyle6 import PYTHON_VERSION from uncompyle6.scanner import Token # Get all the opcodes into globals globals().update(dis.opmap) import uncompyle6.opcodes.opcode_34 # verify uses JUMP_OPs from here JUMP_OPs = uncompyle6.opcodes.opcode_34.JUMP_OPs from uncompyle6.opcodes.opcode_34 import * class Scanner34(scan3.Scanner3): def disassemble(self, co, classname=None): fn = self.disassemble_built_in if PYTHON_VERSION == 3.4 \ else self.disassemble_generic return fn(co, classname) def disassemble_built_in(self, co, classname=None): # Container for tokens tokens = [] customize = {} self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets() bytecode = dis.Bytecode(co) # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] # names = [ unmangle(name) for name in co.co_names ] # varnames = [ unmangle(name) for name in co.co_varnames ] else: # free = co.co_cellvars + co.co_freevars # names = co.co_names # varnames = co.co_varnames pass # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those # assertions self.load_asserts = set() bs = list(bytecode) n = len(bs) for i in range(n): inst = bs[i] if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: next_inst = bs[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): self.load_asserts.add(next_inst.offset) for inst in bytecode: if inst.offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[inst.offset]: tokens.append(Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx))) jump_idx += 1 pass pass pattr = inst.argrepr opname = inst.opname # For constants, the pattr is the same as attr. Using pattr adds # an extra level of quotes which messes other things up, like getting # keyword attribute names in a call. I suspect there will be things # other than LOAD_CONST, but we'll start out with just this for now. if opname in ['LOAD_CONST']: const = inst.argval if inspect.iscode(const): if const.co_name == '': opname = 'LOAD_LAMBDA' elif const.co_name == '': opname = 'LOAD_GENEXPR' elif const.co_name == '': opname = 'LOAD_DICTCOMP' elif const.co_name == '': opname = 'LOAD_SETCOMP' elif const.co_name == '': opname = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '' else: pattr = const pass elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', 'UNPACK_SEQUENCE', 'MAKE_FUNCTION', 'MAKE_CLOSURE', 'DUP_TOPX', 'RAISE_VARARGS' ): # if opname == 'BUILD_TUPLE' and \ # self.code[self.prev[offset]] == LOAD_CLOSURE: # continue # else: # op_name = '%s_%d' % (op_name, oparg) # if opname != BUILD_SLICE: # customize[op_name] = oparg opname = '%s_%d' % (opname, inst.argval) if inst.opname != 'BUILD_SLICE': customize[opname] = inst.argval elif opname == 'JUMP_ABSOLUTE': pattr = inst.argval target = self.get_target(inst.offset) if target < inst.offset: if (inst.offset in self.stmts and self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) and offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' tokens.append( Token( type_ = opname, attr = inst.argval, pattr = pattr, offset = inst.offset, linestart = inst.starts_line, ) ) pass return tokens, {} # FIXME: merge with scanner3 code def detect_structure(self, offset): """ Detect structures and their boundaries to fix optimizied jumps in python2.3+ """ code = self.code op = code[offset] # Detect parent structure parent = self.structs[0] start = parent['start'] end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: curent_start = struct['start'] curent_end = struct['end'] if (curent_start <= offset < curent_end) and (curent_start >= start and curent_end <= end): start = curent_start end = curent_end parent = struct pass if op == SETUP_EXCEPT: start = offset + 3 target = self.get_target(offset) end = self.restrict_to_parent(target, parent) if target != end: self.fixed_jumps[pos] = end # print target, end, parent # Add the try block self.structs.append({'type': 'try', 'start': start, 'end': end-4}) # Now isolate the except and else blocks end_else = start_else = self.get_target(self.prev_op[end]) # Add the except blocks i = end while self.code[i] != END_FINALLY: jmp = self.next_except_jump(i) if self.code[jmp] == RETURN_VALUE: self.structs.append({'type': 'except', 'start': i, 'end': jmp+1}) i = jmp + 1 else: if self.get_target(jmp) != start_else: end_else = self.get_target(jmp) if self.code[jmp] == JUMP_FORWARD: self.fixed_jumps[jmp] = -1 self.structs.append({'type': 'except', 'start': i, 'end': jmp}) i = jmp + 3 # Add the try-else block if end_else != start_else: r_end_else = self.restrict_to_parent(end_else, parent) self.structs.append({'type': 'try-else', 'start': i+1, 'end': r_end_else}) self.fixed_jumps[i] = r_end_else else: self.fixed_jumps[i] = i+1 elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): start = offset + self.op_size(op) target = self.get_target(offset) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op # Do not let jump to go out of parent struct bounds if target != rtarget and parent['type'] == 'and/or': self.fixed_jumps[offset] = rtarget return # Does this jump to right after another cond jump? # If so, it's part of a larger conditional if (code[prev_op[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > offset): self.fixed_jumps[offset] = prev_op[target] self.structs.append({'type': 'and/or', 'start': start, 'end': prev_op[target]}) return # Is it an and inside if block if op == POP_JUMP_IF_FALSE: # Search for other POP_JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or(start, self.next_stmt[offset], POP_JUMP_IF_FALSE, target) match = self.remove_mid_line_ifs(match) # If we still have any offsets in set, start working on it if match: if (code[prev_op[rtarget]] in (JUMP_FORWARD, JUMP_ABSOLUTE) and prev_op[rtarget] not in self.stmts and self.restrict_to_parent(self.get_target(prev_op[rtarget]), parent) == rtarget): if (code[prev_op[prev_op[rtarget]]] == JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and target == self.get_target(prev_op[prev_op[rtarget]]) and (prev_op[prev_op[rtarget]] not in self.stmts or self.get_target(prev_op[prev_op[rtarget]]) > prev_op[prev_op[rtarget]]) and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)))): pass elif (code[prev_op[prev_op[rtarget]]] == RETURN_VALUE and self.remove_mid_line_ifs([offset]) and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target))) | set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[prev_op[rtarget]], (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE, JUMP_ABSOLUTE), prev_op[rtarget], True)))))): pass else: fix = None jump_ifs = self.all_instr(start, self.next_stmt[offset], POP_JUMP_IF_FALSE) last_jump_good = True for j in jump_ifs: if target == self.get_target(j): if self.lines[j].next == j + 3 and last_jump_good: fix = j break else: last_jump_good = False self.fixed_jumps[offset] = fix or match[-1] return else: self.fixed_jumps[offset] = match[-1] return # op == POP_JUMP_IF_TRUE else: next = self.next_stmt[offset] if prev_op[next] == offset: pass elif code[next] in (JUMP_FORWARD, JUMP_ABSOLUTE) and target == self.get_target(next): if code[prev_op[next]] == POP_JUMP_IF_FALSE: if code[next] == JUMP_FORWARD or target != rtarget or code[prev_op[prev_op[rtarget]]] not in (JUMP_ABSOLUTE, RETURN_VALUE): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == JUMP_ABSOLUTE and code[target] in (JUMP_ABSOLUTE, JUMP_FORWARD) and self.get_target(target) == self.get_target(next)): self.fixed_jumps[offset] = prev_op[next] return # Don't add a struct for a while test, it's already taken care of if offset in self.ignore_if: return if (code[prev_op[rtarget]] == JUMP_ABSOLUTE and prev_op[rtarget] in self.stmts and prev_op[rtarget] != offset and prev_op[prev_op[rtarget]] != offset and not (code[rtarget] == JUMP_ABSOLUTE and code[rtarget+3] == POP_BLOCK and code[prev_op[prev_op[rtarget]]] != JUMP_ABSOLUTE)): rtarget = prev_op[rtarget] # Does the if jump just beyond a jump op, then this is probably an if statement if code[prev_op[rtarget]] in (JUMP_ABSOLUTE, JUMP_FORWARD): if_end = self.get_target(prev_op[rtarget]) # Is this a loop not an if? if (if_end < prev_op[rtarget]) and (code[prev_op[if_end]] == SETUP_LOOP): if(if_end > start): return end = self.restrict_to_parent(if_end, parent) self.structs.append({'type': 'if-then', 'start': start, 'end': prev_op[rtarget]}) self.not_continue.add(prev_op[rtarget]) if rtarget < end: self.structs.append({'type': 'if-else', 'start': rtarget, 'end': end}) elif code[prev_op[rtarget]] == RETURN_VALUE: self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) self.return_end_ifs.add(prev_op[rtarget]) elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): target = self.get_target(offset) if target > offset: unop_target = self.last_instr(offset, target, JUMP_FORWARD, target) if unop_target and code[unop_target+3] != ROT_TWO: self.fixed_jumps[offset] = unop_target else: self.fixed_jumps[offset] = self.restrict_to_parent(target, parent) def next_except_jump(self, start): """ Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. """ if self.code[start] == DUP_TOP: except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) if except_match: jmp = self.prev_op[self.get_target(except_match)] self.ignore_if.add(except_match) self.not_continue.add(jmp) return jmp count_END_FINALLY = 0 count_SETUP_ = 0 for i in self.op_range(start, len(self.code)): op = self.code[i] if op == END_FINALLY: if count_END_FINALLY == count_SETUP_: assert self.code[self.prev_op[i]] in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE) self.not_continue.add(self.prev_op[i]) return self.prev_op[i] count_END_FINALLY += 1 elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): count_SETUP_ += 1 if __name__ == "__main__": co = inspect.currentframe().f_code tokens, customize = Scanner34().disassemble(co) for t in tokens: print(t) pass