diff --git a/__pkginfo__.py b/__pkginfo__.py index 9442bbf0..1b691d77 100644 --- a/__pkginfo__.py +++ b/__pkginfo__.py @@ -37,7 +37,7 @@ entry_points={ ]} ftp_url = None install_requires = ['spark-parser >= 1.2.1', - 'xdis >= 1.0.2'] + 'xdis >= 1.0.4'] license = 'MIT' mailing_list = 'python-debugger@googlegroups.com' modname = 'uncompyle6' diff --git a/uncompyle6/opcodes/opcode_27.py b/uncompyle6/opcodes/opcode_27.py deleted file mode 100755 index b7bc72ef..00000000 --- a/uncompyle6/opcodes/opcode_27.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -CPython 2.7 bytecode opcodes - -This is used in scanner (bytecode disassembly) and parser (Python grammar). - -This is a superset of Python 3.4's opcode.py with some opcodes that simplify -parsing and semantic interpretation. -""" - -# FIXME: DRY this along the lines of opcode_3x. - -cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', - 'is not', 'exception match', 'BAD') - -hasconst = [] -hasname = [] -hasjrel = [] -hasjabs = [] -haslocal = [] -hascompare = [] -hasfree = [] -hasArgumentExtended = [] -PJIF = PJIT = JA = JF = 0 - -opmap = {} -opname = [''] * 256 -for op in range(256): opname[op] = '<%r>' % (op,) -del op - -def def_op(name, op): - opname[op] = name - opmap[name] = op - globals().update({name: op}) - -def name_op(name, op): - def_op(name, op) - hasname.append(op) - -def jrel_op(name, op): - def_op(name, op) - hasjrel.append(op) - -def jabs_op(name, op): - def_op(name, op) - hasjabs.append(op) - -def updateGlobal(): - globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']}) - globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']}) - globals().update({'JA': opmap['JUMP_ABSOLUTE']}) - globals().update({'JF': opmap['JUMP_FORWARD']}) - globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()])) - globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)}) - -# Instruction opcodes for compiled code -# Blank lines correspond to available opcodes - -def_op('STOP_CODE', 0) -def_op('POP_TOP', 1) -def_op('ROT_TWO', 2) -def_op('ROT_THREE', 3) -def_op('DUP_TOP', 4) -def_op('ROT_FOUR', 5) - -def_op('NOP', 9) -def_op('UNARY_POSITIVE', 10) -def_op('UNARY_NEGATIVE', 11) -def_op('UNARY_NOT', 12) -def_op('UNARY_CONVERT', 13) - -def_op('UNARY_INVERT', 15) - -def_op('BINARY_POWER', 19) -def_op('BINARY_MULTIPLY', 20) -def_op('BINARY_DIVIDE', 21) -def_op('BINARY_MODULO', 22) -def_op('BINARY_ADD', 23) -def_op('BINARY_SUBTRACT', 24) -def_op('BINARY_SUBSCR', 25) -def_op('BINARY_FLOOR_DIVIDE', 26) -def_op('BINARY_TRUE_DIVIDE', 27) -def_op('INPLACE_FLOOR_DIVIDE', 28) -def_op('INPLACE_TRUE_DIVIDE', 29) -def_op('SLICE+0', 30) -def_op('SLICE+1', 31) -def_op('SLICE+2', 32) -def_op('SLICE+3', 33) - -def_op('STORE_SLICE+0', 40) -def_op('STORE_SLICE+1', 41) -def_op('STORE_SLICE+2', 42) -def_op('STORE_SLICE+3', 43) - -def_op('DELETE_SLICE+0', 50) -def_op('DELETE_SLICE+1', 51) -def_op('DELETE_SLICE+2', 52) -def_op('DELETE_SLICE+3', 53) - -def_op('STORE_MAP', 54) -def_op('INPLACE_ADD', 55) -def_op('INPLACE_SUBTRACT', 56) -def_op('INPLACE_MULTIPLY', 57) -def_op('INPLACE_DIVIDE', 58) -def_op('INPLACE_MODULO', 59) -def_op('STORE_SUBSCR', 60) -def_op('DELETE_SUBSCR', 61) -def_op('BINARY_LSHIFT', 62) -def_op('BINARY_RSHIFT', 63) -def_op('BINARY_AND', 64) -def_op('BINARY_XOR', 65) -def_op('BINARY_OR', 66) -def_op('INPLACE_POWER', 67) -def_op('GET_ITER', 68) - -def_op('PRINT_EXPR', 70) -def_op('PRINT_ITEM', 71) -def_op('PRINT_NEWLINE', 72) -def_op('PRINT_ITEM_TO', 73) -def_op('PRINT_NEWLINE_TO', 74) -def_op('INPLACE_LSHIFT', 75) -def_op('INPLACE_RSHIFT', 76) -def_op('INPLACE_AND', 77) -def_op('INPLACE_XOR', 78) -def_op('INPLACE_OR', 79) -def_op('BREAK_LOOP', 80) -def_op('WITH_CLEANUP', 81) -def_op('LOAD_LOCALS', 82) -def_op('RETURN_VALUE', 83) -def_op('IMPORT_STAR', 84) -def_op('EXEC_STMT', 85) -def_op('YIELD_VALUE', 86) -def_op('POP_BLOCK', 87) -def_op('END_FINALLY', 88) -def_op('BUILD_CLASS', 89) - -HAVE_ARGUMENT = 90 # Opcodes from here have an argument: - -name_op('STORE_NAME', 90) # Index in name list -name_op('DELETE_NAME', 91) # "" -def_op('UNPACK_SEQUENCE', 92) # Number of tuple items -jrel_op('FOR_ITER', 93) -def_op('LIST_APPEND', 94) - -name_op('STORE_ATTR', 95) # Index in name list -name_op('DELETE_ATTR', 96) # "" -name_op('STORE_GLOBAL', 97) # "" -name_op('DELETE_GLOBAL', 98) # "" -def_op('DUP_TOPX', 99) # number of items to duplicate -def_op('LOAD_CONST', 100) # Index in const list -hasconst.append(100) -name_op('LOAD_NAME', 101) # Index in name list -def_op('BUILD_TUPLE', 102) # Number of tuple items -def_op('BUILD_LIST', 103) # Number of list items -def_op('BUILD_SET', 104) # Number of set items -def_op('BUILD_MAP', 105) # Number of dict entries (upto 255) -name_op('LOAD_ATTR', 106) # Index in name list -def_op('COMPARE_OP', 107) # Comparison operator -hascompare.append(107) -name_op('IMPORT_NAME', 108) # Index in name list -name_op('IMPORT_FROM', 109) # Index in name list -jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip -jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code -jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" -jabs_op('JUMP_ABSOLUTE', 113) # "" -jabs_op('POP_JUMP_IF_FALSE', 114) # "" -jabs_op('POP_JUMP_IF_TRUE', 115) # "" - -name_op('LOAD_GLOBAL', 116) # Index in name list - -jabs_op('CONTINUE_LOOP', 119) # Target address -jrel_op('SETUP_LOOP', 120) # Distance to target address -jrel_op('SETUP_EXCEPT', 121) # "" -jrel_op('SETUP_FINALLY', 122) # "" - -def_op('LOAD_FAST', 124) # Local variable number -haslocal.append(124) -def_op('STORE_FAST', 125) # Local variable number -haslocal.append(125) -def_op('DELETE_FAST', 126) # Local variable number -haslocal.append(126) - -def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) -def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) -def_op('MAKE_FUNCTION', 132) # Number of args with default values -def_op('BUILD_SLICE', 133) # Number of items -def_op('MAKE_CLOSURE', 134) -def_op('LOAD_CLOSURE', 135) -hasfree.append(135) -def_op('LOAD_DEREF', 136) -hasfree.append(136) -def_op('STORE_DEREF', 137) -hasfree.append(137) - -def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) -def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) -def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) - -jrel_op('SETUP_WITH', 143) - -def_op('EXTENDED_ARG', 145) -EXTENDED_ARG = 145 -def_op('SET_ADD', 146) -def_op('MAP_ADD', 147) - -# PyPy magic opcodes -# FIXME: see if we can conditionally add them -def_op('LOOKUP_METHOD', 201) -def_op('CALL_METHOD', 202) -def_op('BUILD_LIST_FROM_ARG', 203) -def_op('JUMP_IF_NOT_DEBUG', 204) - -updateGlobal() -del def_op, name_op, jrel_op, jabs_op - -from uncompyle6 import PYTHON_VERSION -if PYTHON_VERSION == 2.7: - import dis - # print(set(dis.opmap.items()) - set(opmap.items())) - assert all(item in opmap.items() for item in dis.opmap.items()) diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 8e780308..f15cb5aa 100755 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -30,9 +30,10 @@ if PYTHON3: else: L65536 = long(65536) # NOQA -from uncompyle6.opcodes import (opcode_25, opcode_26, opcode_27) +from uncompyle6.opcodes import (opcode_25, opcode_26) -from xdis.opcodes import (opcode_32, opcode_33, opcode_34, opcode_35) +from xdis.opcodes import (opcode_27, + opcode_32, opcode_33, opcode_34, opcode_35) class Code(object): @@ -212,16 +213,6 @@ class Scanner(object): result.append(offset) return result - def op_size(self, op): - """ - Return size of operator with its arguments - for given opcode . - """ - if op < self.opc.HAVE_ARGUMENT and op not in self.opc.hasArgumentExtended: - return 1 - else: - return 3 - def op_hasArgument(self, op): return self.op_size(op) > 1 diff --git a/uncompyle6/scanners/scanner2.py b/uncompyle6/scanners/scanner2.py new file mode 100755 index 00000000..384c2202 --- /dev/null +++ b/uncompyle6/scanners/scanner2.py @@ -0,0 +1,772 @@ +# Copyright (c) 2015, 2016 by Rocky Bernstein +# Copyright (c) 2005 by Dan Pascu +# Copyright (c) 2000-2002 by hartmut Goebel +""" +Python 2 Generic bytecode scanner/deparser + +This overlaps various Python3's dis module, but it can be run from +Python versions other than the version running this code. Notably, +run from Python version 2. + +Also we *modify* the instruction sequence to assist deparsing code. +For example: + - we add "COME_FROM" instructions to help in figuring out + conditional branching and looping. + - LOAD_CONSTs are classified further into the type of thing + they load: + lambda's, genexpr's, {dict,set,list} comprehension's, + - PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE} + +Finally we save token information. +""" + + +from __future__ import print_function + +import dis, inspect +from collections import namedtuple +from array import array + +from xdis.code import iscode + +# FIXME: remove +from xdis.opcodes.opcode_27 import * # NOQA + +import uncompyle6.scanner as scan + +class Scanner2(scan.Scanner): + def __init__(self, version): + scan.Scanner.__init__(self, version) + + def disassemble(self, co, classname=None, code_objects={}): + """ + Disassemble a Python 2 code object, returning a list of 'Token'. + Various tranformations are made to assist the deparsing grammar. + For example: + - various types of LOAD_CONST's are categorized in terms of what they load + - COME_FROM instructions are added to assist parsing control structures + - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments + The main part of this procedure is modelled after + dis.disassemble(). + """ + + # import dis; dis.disassemble(co) # DEBUG + + # Container for tokens + tokens = [] + + customize = {} + Token = self.Token # shortcut + + n = self.setup_code(co) + self.build_lines_data(co, n) + self.build_prev_op(n) + + # self.lines contains (block,addrLastInstr) + if classname: + classname = '_' + classname.lstrip('_') + '__' + + def unmangle(name): + if name.startswith(classname) and name[-2:] != '__': + return name[len(classname) - 2:] + return name + + free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] + names = [ unmangle(name) for name in co.co_names ] + varnames = [ unmangle(name) for name in co.co_varnames ] + else: + free = co.co_cellvars + co.co_freevars + names = co.co_names + varnames = co.co_varnames + + self.load_asserts = set() + for i in self.op_range(0, n): + if self.code[i] == self.opc.PJIT and self.code[i+3] == self.opc.LOAD_GLOBAL: + if names[self.get_argument(i+3)] == 'AssertionError': + self.load_asserts.add(i+3) + + cf = self.find_jump_targets() + # contains (code, [addrRefToCode]) + last_stmt = self.next_stmt[0] + i = self.next_stmt[last_stmt] + replace = {} + while i < n-1: + if self.lines[last_stmt].next > i: + if self.code[last_stmt] == self.opc.PRINT_ITEM: + if self.code[i] == self.opc.PRINT_ITEM: + replace[i] = 'PRINT_ITEM_CONT' + elif self.code[i] == self.opc.PRINT_NEWLINE: + replace[i] = 'PRINT_NEWLINE_CONT' + last_stmt = i + i = self.next_stmt[i] + + imports = self.all_instr(0, n, (self.opc.IMPORT_NAME, self.opc.IMPORT_FROM, + self.opc.IMPORT_STAR)) + if len(imports) > 1: + last_import = imports[0] + for i in imports[1:]: + if self.lines[last_import].next > i: + if self.code[last_import] == self.opc.IMPORT_NAME == self.code[i]: + replace[i] = 'IMPORT_NAME_CONT' + last_import = i + + extended_arg = 0 + for offset in self.op_range(0, n): + if offset in cf: + k = 0 + for j in cf[offset]: + tokens.append(Token('COME_FROM', None, repr(j), + offset="%s_%d" % (offset, k))) + k += 1 + + op = self.code[offset] + op_name = self.opc.opname[op] + + oparg = None; pattr = None + if op >= self.opc.HAVE_ARGUMENT: + oparg = self.get_argument(offset) + extended_arg + extended_arg = 0 + if op == self.opc.EXTENDED_ARG: + extended_arg = oparg * scan.L65536 + continue + if op in self.opc.hasconst: + const = co.co_consts[oparg] + if iscode(const): + oparg = const + if const.co_name == '': + assert op_name == 'LOAD_CONST' + op_name = 'LOAD_LAMBDA' + elif const.co_name == '': + op_name = 'LOAD_GENEXPR' + elif const.co_name == '': + op_name = 'LOAD_DICTCOMP' + elif const.co_name == '': + op_name = 'LOAD_SETCOMP' + # verify() uses 'pattr' for comparison, since 'attr' + # now holds Code(const) and thus can not be used + # for comparison (todo: think about changing this) + # pattr = 'code_object @ 0x%x %s->%s' %\ + # (id(const), const.co_filename, const.co_name) + pattr = '' + else: + pattr = const + elif op in self.opc.hasname: + pattr = names[oparg] + elif op in self.opc.hasjrel: + pattr = repr(offset + 3 + oparg) + elif op in self.opc.hasjabs: + pattr = repr(oparg) + elif op in self.opc.haslocal: + pattr = varnames[oparg] + elif op in self.opc.hascompare: + pattr = self.opc.cmp_op[oparg] + elif op in self.opc.hasfree: + pattr = free[oparg] + + if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, + UNPACK_SEQUENCE, + MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, + CALL_FUNCTION_VAR, CALL_FUNCTION_KW, + CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS + ): + # CE - Hack for >= 2.5 + # Now all values loaded via LOAD_CLOSURE are packed into + # a tuple before calling MAKE_CLOSURE. + if op == BUILD_TUPLE and \ + self.code[self.prev[offset]] == LOAD_CLOSURE: + continue + else: + op_name = '%s_%d' % (op_name, oparg) + if op != BUILD_SLICE: + customize[op_name] = oparg + elif op == JA: + target = self.get_target(offset) + if target < offset: + if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ + and offset not in self.not_continue: + op_name = 'CONTINUE' + else: + op_name = 'JUMP_BACK' + + elif op == LOAD_GLOBAL: + if offset in self.load_asserts: + op_name = 'LOAD_ASSERT' + elif op == RETURN_VALUE: + if offset in self.return_end_ifs: + op_name = 'RETURN_END_IF' + + if offset in self.linestartoffsets: + linestart = self.linestartoffsets[offset] + else: + linestart = None + + if offset not in replace: + tokens.append(Token(op_name, oparg, pattr, offset, linestart)) + else: + tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) + return tokens, customize + + def disassemble_native(self, co, classname=None, code_objects={}): + """ + Like disassemble3 but doesn't try to adjust any opcodes. + """ + + # Container for tokens + tokens = [] + + customize = {} + Token = self.Token # shortcut + + n = self.setup_code(co) + self.build_lines_data(co, n) + + # self.lines contains (block,addrLastInstr) + if classname: + classname = '_' + classname.lstrip('_') + '__' + + def unmangle(name): + if name.startswith(classname) and name[-2:] != '__': + return name[len(classname) - 2:] + return name + + free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] + names = [ unmangle(name) for name in co.co_names ] + varnames = [ unmangle(name) for name in co.co_varnames ] + else: + free = co.co_cellvars + co.co_freevars + names = co.co_names + varnames = co.co_varnames + + extended_arg = 0 + for offset in self.op_range(0, n): + op = self.code[offset] + op_name = self.opc.opname[op] + + oparg = None; pattr = None + if op >= HAVE_ARGUMENT: + oparg = self.get_argument(offset) + extended_arg + extended_arg = 0 + if op == EXTENDED_ARG: + extended_arg = oparg * scan.L65536 + continue + if op in hasconst: + pattr = co.co_consts[oparg] + elif op in hasname: + pattr = names[oparg] + elif op in hasjrel: + pattr = repr(offset + 3 + oparg) + elif op in hasjabs: + pattr = repr(oparg) + elif op in haslocal: + pattr = varnames[oparg] + elif op in hascompare: + pattr = cmp_op[oparg] + elif op in hasfree: + pattr = free[oparg] + + if offset in self.linestartoffsets: + linestart = self.linestartoffsets[offset] + else: + linestart = None + + tokens.append(Token(op_name, oparg, pattr, offset, linestart)) + pass + return tokens, customize + + def op_size(self, op): + """ + Return size of operator with its arguments + for given opcode . + """ + if op < self.opc.HAVE_ARGUMENT and op not in self.opc.hasArgumentExtended: + return 1 + else: + return 3 + + def setup_code(self, co): + """ + Creates Python-independent bytecode structure (byte array) in + self.code and records previous instruction in self.prev + The size of self.code is returned + """ + self.code = array('B', co.co_code) + + n = -1 + for i in self.op_range(0, len(self.code)): + if self.code[i] in (self.opc.RETURN_VALUE, self.opc.END_FINALLY): + n = i + 1 + pass + pass + assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY" + self.code = array('B', co.co_code[:n]) + + return n + + def build_prev_op(self, n): + self.prev = [0] + # mapping addresses of instruction & argument + for i in self.op_range(0, n): + op = self.code[i] + self.prev.append(i) + if op >= HAVE_ARGUMENT: + self.prev.append(i) + self.prev.append(i) + pass + pass + + def build_lines_data(self, co, n): + """ + Initializes self.lines and self.linesstartoffsets + """ + self.lines = [] + linetuple = namedtuple('linetuple', ['l_no', 'next']) + + # linestarts is a tuple of (offset, line number). + # Turn that in a has that we can index + linestarts = list(dis.findlinestarts(co)) + self.linestartoffsets = {} + for offset, lineno in linestarts: + self.linestartoffsets[offset] = lineno + + j = 0 + (prev_start_byte, prev_line_no) = linestarts[0] + for (start_byte, line_no) in linestarts[1:]: + while j < start_byte: + self.lines.append(linetuple(prev_line_no, start_byte)) + j += 1 + prev_line_no = start_byte + while j < n: + self.lines.append(linetuple(prev_line_no, n)) + j+=1 + return + + def build_stmt_indices(self): + code = self.code + start = 0 + end = len(code) + + stmt_opcodes = set([ + self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, self.opc.CONTINUE_LOOP, + self.opc.SETUP_FINALLY, self.opc.END_FINALLY, + self.opc.SETUP_EXCEPT, self.opc.SETUP_WITH, + self.opc.POP_BLOCK, self.opc.STORE_FAST, self.opc.DELETE_FAST, + self.opc.STORE_DEREF, self.opc.STORE_GLOBAL, + self.opc.DELETE_GLOBAL, self.opc.STORE_NAME, + self.opc.DELETE_NAME, self.opc.STORE_ATTR, + self.opc.DELETE_ATTR, + ## FIXME keep going. + STORE_SUBSCR, DELETE_SUBSCR, + RETURN_VALUE, RAISE_VARARGS, POP_TOP, + PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO, + STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, + DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3, + JUMP_ABSOLUTE, EXEC_STMT, + ]) + + stmt_opcode_seqs = [(self.opc.PJIF, self.opc.JF), + (self.opc.PJIF, self.opc.JA), + (self.opc.PJIT, self.opc.JF), + (self.opc.PJIT, self.opc.JA)] + + designator_ops = set([ + self.opc.STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, + STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, + STORE_SUBSCR, UNPACK_SEQUENCE, JA + ]) + + prelim = self.all_instr(start, end, stmt_opcodes) + + stmts = self.stmts = set(prelim) + pass_stmts = set() + for seq in stmt_opcode_seqs: + for i in self.op_range(start, end-(len(seq)+1)): + match = True + for elem in seq: + if elem != code[i]: + match = False + break + i += self.op_size(code[i]) + + if match: + i = self.prev[i] + stmts.add(i) + pass_stmts.add(i) + + if pass_stmts: + stmt_list = list(stmts) + stmt_list.sort() + else: + stmt_list = prelim + last_stmt = -1 + self.next_stmt = [] + slist = self.next_stmt = [] + i = 0 + for s in stmt_list: + if code[s] == JA and s not in pass_stmts: + target = self.get_target(s) + if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no: + stmts.remove(s) + continue + j = self.prev[s] + while code[j] == JA: + j = self.prev[j] + if code[j] == self.opc.LIST_APPEND: # list comprehension + stmts.remove(s) + continue + elif code[s] == self.opc.POP_TOP and code[self.prev[s]] == self.opc.ROT_TWO: + stmts.remove(s) + continue + elif code[s] in designator_ops: + j = self.prev[s] + while code[j] in designator_ops: + j = self.prev[j] + if code[j] == self.opc.FOR_ITER: + stmts.remove(s) + continue + last_stmt = s + slist += [s] * (s-i) + i = s + slist += [end] * (end-len(slist)) + + def next_except_jump(self, start): + ''' + Return the next jump that was generated by an except SomeException: + construct in a try...except...else clause or None if not found. + ''' + + if self.code[start] == DUP_TOP: + except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) + if except_match: + jmp = self.prev[self.get_target(except_match)] + self.ignore_if.add(except_match) + self.not_continue.add(jmp) + return jmp + + count_END_FINALLY = 0 + count_SETUP_ = 0 + for i in self.op_range(start, len(self.code)): + op = self.code[i] + if op == END_FINALLY: + if count_END_FINALLY == count_SETUP_: + assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + self.not_continue.add(self.prev[i]) + return self.prev[i] + count_END_FINALLY += 1 + elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): + count_SETUP_ += 1 + + def detect_structure(self, pos, op=None): + ''' + Detect type of block structures and their boundaries to fix optimized jumps + in python2.3+ + ''' + + # TODO: check the struct boundaries more precisely -Dan + + code = self.code + # Ev remove this test and make op a mandatory argument -Dan + if op is None: + op = code[pos] + + # Detect parent structure + parent = self.structs[0] + start = parent['start'] + end = parent['end'] + for s in self.structs: + _start = s['start'] + _end = s['end'] + if (_start <= pos < _end) and (_start >= start and _end <= end): + start = _start + end = _end + parent = s + + if op == SETUP_LOOP: + start = pos+3 + target = self.get_target(pos, op) + end = self.restrict_to_parent(target, parent) + + if target != end: + self.fixed_jumps[pos] = end + (line_no, next_line_byte) = self.lines[pos] + jump_back = self.last_instr(start, end, JA, + next_line_byte, False) + + if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF): + if code[self.prev[end]] == RETURN_VALUE or \ + (code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE): + jump_back = None + if not jump_back: # loop suite ends in return. wtf right? + jump_back = self.last_instr(start, end, RETURN_VALUE) + 1 + if not jump_back: + return + if code[self.prev[next_line_byte]] not in (PJIF, PJIT): + loop_type = 'for' + else: + loop_type = 'while' + self.ignore_if.add(self.prev[next_line_byte]) + target = next_line_byte + end = jump_back + 3 + else: + if self.get_target(jump_back) >= next_line_byte: + jump_back = self.last_instr(start, end, JA, start, False) + if end > jump_back+4 and code[end] in (JF, JA): + if code[jump_back+4] in (JA, JF): + if self.get_target(jump_back+4) == self.get_target(end): + self.fixed_jumps[pos] = jump_back+4 + end = jump_back+4 + elif target < pos: + self.fixed_jumps[pos] = jump_back+4 + end = jump_back+4 + target = self.get_target(jump_back, JA) + + if code[target] in (FOR_ITER, GET_ITER): + loop_type = 'for' + else: + loop_type = 'while' + test = self.prev[next_line_byte] + if test == pos: + loop_type = 'while 1' + elif self.code[test] in hasjabs+hasjrel: + self.ignore_if.add(test) + test_target = self.get_target(test) + if test_target > (jump_back+3): + jump_back = test_target + self.not_continue.add(jump_back) + self.loops.append(target) + self.structs.append({'type': loop_type + '-loop', + 'start': target, + 'end': jump_back}) + if jump_back+3 != end: + self.structs.append({'type': loop_type + '-else', + 'start': jump_back+3, + 'end': end}) + elif op == SETUP_EXCEPT: + start = pos+3 + target = self.get_target(pos, op) + end = self.restrict_to_parent(target, parent) + if target != end: + self.fixed_jumps[pos] = end + # print target, end, parent + # Add the try block + self.structs.append({'type': 'try', + 'start': start, + 'end': end-4}) + # Now isolate the except and else blocks + end_else = start_else = self.get_target(self.prev[end]) + + # Add the except blocks + i = end + while self.code[i] != END_FINALLY: + jmp = self.next_except_jump(i) + if self.code[jmp] == RETURN_VALUE: + self.structs.append({'type': 'except', + 'start': i, + 'end': jmp+1}) + i = jmp + 1 + else: + if self.get_target(jmp) != start_else: + end_else = self.get_target(jmp) + if self.code[jmp] == JF: + self.fixed_jumps[jmp] = -1 + self.structs.append({'type': 'except', + 'start': i, + 'end': jmp}) + i = jmp + 3 + + # Add the try-else block + if end_else != start_else: + r_end_else = self.restrict_to_parent(end_else, parent) + self.structs.append({'type': 'try-else', + 'start': i+1, + 'end': r_end_else}) + self.fixed_jumps[i] = r_end_else + else: + self.fixed_jumps[i] = i+1 + + elif op in (PJIF, PJIT): + start = pos+3 + target = self.get_target(pos, op) + rtarget = self.restrict_to_parent(target, parent) + pre = self.prev + + if target != rtarget and parent['type'] == 'and/or': + self.fixed_jumps[pos] = rtarget + return + # does this jump to right after another cond jump? + # if so, it's part of a larger conditional + if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, + PJIF, PJIT)) and (target > pos): + self.fixed_jumps[pos] = pre[target] + self.structs.append({'type': 'and/or', + 'start': start, + 'end': pre[target]}) + return + + # is this an if and + if op == PJIF: + match = self.rem_or(start, self.next_stmt[pos], PJIF, target) + match = self.remove_mid_line_ifs(match) + + if match: + if code[pre[rtarget]] in (JF, JA) \ + and pre[rtarget] not in self.stmts \ + and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: + if code[pre[pre[rtarget]]] == JA \ + and self.remove_mid_line_ifs([pos]) \ + and target == self.get_target(pre[pre[rtarget]]) \ + and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ + and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))): + pass + elif code[pre[pre[rtarget]]] == RETURN_VALUE \ + and self.remove_mid_line_ifs([pos]) \ + and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, + pre[pre[rtarget]], + (PJIF, PJIT), target))) + | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], + (PJIF, PJIT, JA), pre[rtarget], True))))): + pass + else: + fix = None + jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF) + last_jump_good = True + for j in jump_ifs: + if target == self.get_target(j): + if self.lines[j].next == j+3 and last_jump_good: + fix = j + break + else: + last_jump_good = False + self.fixed_jumps[pos] = fix or match[-1] + return + else: + self.fixed_jumps[pos] = match[-1] + return + else: # op == PJIT + if (pos+3) in self.load_asserts: + if code[pre[rtarget]] == RAISE_VARARGS: + return + self.load_asserts.remove(pos+3) + + next = self.next_stmt[pos] + if pre[next] == pos: + pass + elif code[next] in (JF, JA) and target == self.get_target(next): + if code[pre[next]] == PJIF: + if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): + self.fixed_jumps[pos] = pre[next] + return + elif code[next] == JA and code[target] in (JA, JF): + next_target = self.get_target(next) + if self.get_target(target) == next_target: + self.fixed_jumps[pos] = pre[next] + return + elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target): + self.fixed_jumps[pos] = pre[next] + return + + # don't add a struct for a while test, it's already taken care of + if pos in self.ignore_if: + return + + if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ + and pre[rtarget] != pos and pre[pre[rtarget]] != pos: + if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK: + if code[pre[pre[rtarget]]] != JA: + pass + elif self.get_target(pre[pre[rtarget]]) != target: + pass + else: + rtarget = pre[rtarget] + else: + rtarget = pre[rtarget] + # does the if jump just beyond a jump op, then this is probably an if statement + if code[pre[rtarget]] in (JA, JF): + if_end = self.get_target(pre[rtarget]) + + # is this a loop not an if? + if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP): + if(if_end > start): + return + + end = self.restrict_to_parent(if_end, parent) + + self.structs.append({'type': 'if-then', + 'start': start, + 'end': pre[rtarget]}) + self.not_continue.add(pre[rtarget]) + + if rtarget < end: + self.structs.append({'type': 'if-else', + 'start': rtarget, + 'end': end}) + elif code[pre[rtarget]] == RETURN_VALUE: + self.structs.append({'type': 'if-then', + 'start': start, + 'end': rtarget}) + self.return_end_ifs.add(pre[rtarget]) + + elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + target = self.get_target(pos, op) + self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) + + def find_jump_targets(self): + ''' + Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + This procedure is modelled after dis.findlabels(), but here + for each target the number of jumps are counted. + ''' + + n = len(self.code) + self.structs = [{'type': 'root', + 'start': 0, + 'end': n-1}] + self.loops = [] # All loop entry points + self.fixed_jumps = {} # Map fixed jumps to their real destination + self.ignore_if = set() + self.build_stmt_indices() + + # Containers filled by detect_structure() + self.not_continue = set() + self.return_end_ifs = set() + + targets = {} + for i in self.op_range(0, n): + op = self.code[i] + + # Determine structures and fix jumps in Python versions + # since 2.3 + self.detect_structure(i, op) + + if op >= HAVE_ARGUMENT: + label = self.fixed_jumps.get(i) + oparg = self.code[i+1] + self.code[i+2] * 256 + if label is None: + if op in hasjrel and op != FOR_ITER: + label = i + 3 + oparg + elif op in hasjabs: + if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): + if (oparg > i): + label = oparg + + if label is not None and label != -1: + targets[label] = targets.get(label, []) + [i] + elif op == END_FINALLY and i in self.fixed_jumps: + label = self.fixed_jumps[i] + targets[label] = targets.get(label, []) + [i] + return targets + +if __name__ == "__main__": + from uncompyle6 import PYTHON_VERSION + if PYTHON_VERSION >= 2.3: + co = inspect.currentframe().f_code + from uncompyle6 import PYTHON_VERSION + tokens, customize = Scanner2(PYTHON_VERSION).disassemble(co) + for t in tokens: + print(t.format()) + else: + print("Need to be Python 3.2 or greater to demo; I am %s." % + PYTHON_VERSION) + pass diff --git a/uncompyle6/scanners/scanner25.py b/uncompyle6/scanners/scanner25.py index e068d6ee..0135c1b8 100755 --- a/uncompyle6/scanners/scanner25.py +++ b/uncompyle6/scanners/scanner25.py @@ -16,11 +16,11 @@ from array import array import dis from uncompyle6.opcodes.opcode_25 import * -import uncompyle6.scanner as scan +import uncompyle6.scanners.scanner2 as scan -class Scanner25(scan.Scanner): +class Scanner25(scan.Scanner2): def __init__(self): - scan.Scanner.__init__(self, 2.5) + super(Scanner25, self).__init__(2.5) def disassemble(self, co, classname=None, code_objects={}): ''' diff --git a/uncompyle6/scanners/scanner26.py b/uncompyle6/scanners/scanner26.py index 6ea0d73a..c2fd3262 100755 --- a/uncompyle6/scanners/scanner26.py +++ b/uncompyle6/scanners/scanner26.py @@ -16,11 +16,11 @@ from array import array from uncompyle6.opcodes.opcode_26 import * import dis -import uncompyle6.scanner as scan +import uncompyle6.scanners.scanner2 as scan -class Scanner26(scan.Scanner): +class Scanner26(scan.Scanner2): def __init__(self): - scan.Scanner.__init__(self, 2.6) + super(Scanner26, self).__init__(2.6) def disassemble(self, co, classname=None, code_objects={}): ''' diff --git a/uncompyle6/scanners/scanner27.py b/uncompyle6/scanners/scanner27.py index 99973dc5..f360359b 100755 --- a/uncompyle6/scanners/scanner27.py +++ b/uncompyle6/scanners/scanner27.py @@ -1,7 +1,4 @@ # Copyright (c) 2015, 2016 by Rocky Bernstein -# Copyright (c) 2005 by Dan Pascu -# Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 1999 John Aycock """ Python 2.7 bytecode scanner/deparser @@ -13,722 +10,27 @@ for later use in deparsing. from __future__ import print_function -import dis, inspect -from collections import namedtuple -from array import array +from uncompyle6.scanners.scanner2 import Scanner2 -from xdis.code import iscode -from uncompyle6.opcodes.opcode_27 import * # NOQA -import uncompyle6.scanner as scan +# bytecode verification, verify(), uses JUMP_OPs from here +from xdis.opcodes import opcode_27 +JUMP_OPs = opcode_27.JUMP_OPs -class Scanner27(scan.Scanner): +class Scanner27(Scanner2): def __init__(self): - scan.Scanner.__init__(self, 2.7) - - def disassemble(self, co, classname=None, code_objects={}): - """ - Disassemble a Python 3 ode object, returning a list of 'Token'. - Various tranformations are made to assist the deparsing grammar. - For example: - - various types of LOAD_CONST's are categorized in terms of what they load - - COME_FROM instructions are added to assist parsing control structures - - MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments - The main part of this procedure is modelled after - dis.disassemble(). - """ - - # import dis; dis.disassemble(co) # DEBUG - - # Container for tokens - tokens = [] - - customize = {} - Token = self.Token # shortcut - - n = self.setup_code(co) - self.build_lines_data(co, n) - self.build_prev_op(n) - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - - free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] - names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] - else: - free = co.co_cellvars + co.co_freevars - names = co.co_names - varnames = co.co_varnames - - self.load_asserts = set() - for i in self.op_range(0, n): - if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL: - if names[self.get_argument(i+3)] == 'AssertionError': - self.load_asserts.add(i+3) - - cf = self.find_jump_targets() - # contains (code, [addrRefToCode]) - last_stmt = self.next_stmt[0] - i = self.next_stmt[last_stmt] - replace = {} - while i < n-1: - if self.lines[last_stmt].next > i: - if self.code[last_stmt] == PRINT_ITEM: - if self.code[i] == PRINT_ITEM: - replace[i] = 'PRINT_ITEM_CONT' - elif self.code[i] == PRINT_NEWLINE: - replace[i] = 'PRINT_NEWLINE_CONT' - last_stmt = i - i = self.next_stmt[i] - - imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) - if len(imports) > 1: - last_import = imports[0] - for i in imports[1:]: - if self.lines[last_import].next > i: - if self.code[last_import] == IMPORT_NAME == self.code[i]: - replace[i] = 'IMPORT_NAME_CONT' - last_import = i - - extended_arg = 0 - for offset in self.op_range(0, n): - if offset in cf: - k = 0 - for j in cf[offset]: - tokens.append(Token('COME_FROM', None, repr(j), - offset="%s_%d" % (offset, k))) - k += 1 - - op = self.code[offset] - op_name = opname[op] - - oparg = None; pattr = None - if op >= HAVE_ARGUMENT: - oparg = self.get_argument(offset) + extended_arg - extended_arg = 0 - if op == EXTENDED_ARG: - extended_arg = oparg * scan.L65536 - continue - if op in hasconst: - const = co.co_consts[oparg] - if iscode(const): - oparg = const - if const.co_name == '': - assert op_name == 'LOAD_CONST' - op_name = 'LOAD_LAMBDA' - elif const.co_name == '': - op_name = 'LOAD_GENEXPR' - elif const.co_name == '': - op_name = 'LOAD_DICTCOMP' - elif const.co_name == '': - op_name = 'LOAD_SETCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - elif op in hasname: - pattr = names[oparg] - elif op in hasjrel: - pattr = repr(offset + 3 + oparg) - elif op in hasjabs: - pattr = repr(oparg) - elif op in haslocal: - pattr = varnames[oparg] - elif op in hascompare: - pattr = cmp_op[oparg] - elif op in hasfree: - pattr = free[oparg] - - if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, - UNPACK_SEQUENCE, - MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, - CALL_FUNCTION_VAR, CALL_FUNCTION_KW, - CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS - ): - # CE - Hack for >= 2.5 - # Now all values loaded via LOAD_CLOSURE are packed into - # a tuple before calling MAKE_CLOSURE. - if op == BUILD_TUPLE and \ - self.code[self.prev[offset]] == LOAD_CLOSURE: - continue - else: - op_name = '%s_%d' % (op_name, oparg) - if op != BUILD_SLICE: - customize[op_name] = oparg - elif op == JA: - target = self.get_target(offset) - if target < offset: - if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ - and offset not in self.not_continue: - op_name = 'CONTINUE' - else: - op_name = 'JUMP_BACK' - - elif op == LOAD_GLOBAL: - if offset in self.load_asserts: - op_name = 'LOAD_ASSERT' - elif op == RETURN_VALUE: - if offset in self.return_end_ifs: - op_name = 'RETURN_END_IF' - - if offset in self.linestartoffsets: - linestart = self.linestartoffsets[offset] - else: - linestart = None - - if offset not in replace: - tokens.append(Token(op_name, oparg, pattr, offset, linestart)) - else: - tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) - return tokens, customize - - def disassemble_native(self, co, classname=None, code_objects={}): - """ - Like disassemble3 but doesn't try to adjust any opcodes. - """ - - # Container for tokens - tokens = [] - - customize = {} - Token = self.Token # shortcut - - n = self.setup_code(co) - self.build_lines_data(co, n) - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - - free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] - names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] - else: - free = co.co_cellvars + co.co_freevars - names = co.co_names - varnames = co.co_varnames - - extended_arg = 0 - for offset in self.op_range(0, n): - op = self.code[offset] - op_name = opname[op] - - oparg = None; pattr = None - if op >= HAVE_ARGUMENT: - oparg = self.get_argument(offset) + extended_arg - extended_arg = 0 - if op == EXTENDED_ARG: - extended_arg = oparg * scan.L65536 - continue - if op in hasconst: - pattr = co.co_consts[oparg] - elif op in hasname: - pattr = names[oparg] - elif op in hasjrel: - pattr = repr(offset + 3 + oparg) - elif op in hasjabs: - pattr = repr(oparg) - elif op in haslocal: - pattr = varnames[oparg] - elif op in hascompare: - pattr = cmp_op[oparg] - elif op in hasfree: - pattr = free[oparg] - - if offset in self.linestartoffsets: - linestart = self.linestartoffsets[offset] - else: - linestart = None - - tokens.append(Token(op_name, oparg, pattr, offset, linestart)) - pass - return tokens, customize - - def setup_code(self, co): - """ - Creates Python-independent bytecode structure (byte array) in - self.code and records previous instruction in self.prev - The size of self.code is returned - """ - self.code = array('B', co.co_code) - - n = -1 - for i in self.op_range(0, len(self.code)): - if self.code[i] in (RETURN_VALUE, END_FINALLY): - n = i + 1 - pass - pass - assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY" - self.code = array('B', co.co_code[:n]) - - return n - - def build_prev_op(self, n): - self.prev = [0] - # mapping addresses of instruction & argument - for i in self.op_range(0, n): - op = self.code[i] - self.prev.append(i) - if op >= HAVE_ARGUMENT: - self.prev.append(i) - self.prev.append(i) - pass - pass - - def build_lines_data(self, co, n): - """ - Initializes self.lines and self.linesstartoffsets - """ - self.lines = [] - linetuple = namedtuple('linetuple', ['l_no', 'next']) - - # linestarts is a tuple of (offset, line number). - # Turn that in a has that we can index - linestarts = list(dis.findlinestarts(co)) - self.linestartoffsets = {} - for offset, lineno in linestarts: - self.linestartoffsets[offset] = lineno - - j = 0 - (prev_start_byte, prev_line_no) = linestarts[0] - for (start_byte, line_no) in linestarts[1:]: - while j < start_byte: - self.lines.append(linetuple(prev_line_no, start_byte)) - j += 1 - prev_line_no = start_byte - while j < n: - self.lines.append(linetuple(prev_line_no, n)) - j+=1 + super(Scanner27, self).__init__(2.7) return - - def build_stmt_indices(self): - code = self.code - start = 0 - end = len(code) - - stmt_opcodes = set([ - SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, - SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, - POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, - STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, - STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, - RETURN_VALUE, RAISE_VARARGS, POP_TOP, - PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO, - STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, - DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3, - JUMP_ABSOLUTE, EXEC_STMT, - ]) - - stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)] - - designator_ops = set([ - STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, - STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, - STORE_SUBSCR, UNPACK_SEQUENCE, JA - ]) - - prelim = self.all_instr(start, end, stmt_opcodes) - - stmts = self.stmts = set(prelim) - pass_stmts = set() - for seq in stmt_opcode_seqs: - for i in self.op_range(start, end-(len(seq)+1)): - match = True - for elem in seq: - if elem != code[i]: - match = False - break - i += self.op_size(code[i]) - - if match: - i = self.prev[i] - stmts.add(i) - pass_stmts.add(i) - - if pass_stmts: - stmt_list = list(stmts) - stmt_list.sort() - else: - stmt_list = prelim - last_stmt = -1 - self.next_stmt = [] - slist = self.next_stmt = [] - i = 0 - for s in stmt_list: - if code[s] == JA and s not in pass_stmts: - target = self.get_target(s) - if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no: - stmts.remove(s) - continue - j = self.prev[s] - while code[j] == JA: - j = self.prev[j] - if code[j] == LIST_APPEND: # list comprehension - stmts.remove(s) - continue - elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO: - stmts.remove(s) - continue - elif code[s] in designator_ops: - j = self.prev[s] - while code[j] in designator_ops: - j = self.prev[j] - if code[j] == FOR_ITER: - stmts.remove(s) - continue - last_stmt = s - slist += [s] * (s-i) - i = s - slist += [end] * (end-len(slist)) - - def next_except_jump(self, start): - ''' - Return the next jump that was generated by an except SomeException: - construct in a try...except...else clause or None if not found. - ''' - - if self.code[start] == DUP_TOP: - except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) - if except_match: - jmp = self.prev[self.get_target(except_match)] - self.ignore_if.add(except_match) - self.not_continue.add(jmp) - return jmp - - count_END_FINALLY = 0 - count_SETUP_ = 0 - for i in self.op_range(start, len(self.code)): - op = self.code[i] - if op == END_FINALLY: - if count_END_FINALLY == count_SETUP_: - assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) - self.not_continue.add(self.prev[i]) - return self.prev[i] - count_END_FINALLY += 1 - elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): - count_SETUP_ += 1 - - def detect_structure(self, pos, op=None): - ''' - Detect type of block structures and their boundaries to fix optimized jumps - in python2.3+ - ''' - - # TODO: check the struct boundaries more precisely -Dan - - code = self.code - # Ev remove this test and make op a mandatory argument -Dan - if op is None: - op = code[pos] - - # Detect parent structure - parent = self.structs[0] - start = parent['start'] - end = parent['end'] - for s in self.structs: - _start = s['start'] - _end = s['end'] - if (_start <= pos < _end) and (_start >= start and _end <= end): - start = _start - end = _end - parent = s - - if op == SETUP_LOOP: - start = pos+3 - target = self.get_target(pos, op) - end = self.restrict_to_parent(target, parent) - - if target != end: - self.fixed_jumps[pos] = end - (line_no, next_line_byte) = self.lines[pos] - jump_back = self.last_instr(start, end, JA, - next_line_byte, False) - - if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF): - if code[self.prev[end]] == RETURN_VALUE or \ - (code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE): - jump_back = None - if not jump_back: # loop suite ends in return. wtf right? - jump_back = self.last_instr(start, end, RETURN_VALUE) + 1 - if not jump_back: - return - if code[self.prev[next_line_byte]] not in (PJIF, PJIT): - loop_type = 'for' - else: - loop_type = 'while' - self.ignore_if.add(self.prev[next_line_byte]) - target = next_line_byte - end = jump_back + 3 - else: - if self.get_target(jump_back) >= next_line_byte: - jump_back = self.last_instr(start, end, JA, start, False) - if end > jump_back+4 and code[end] in (JF, JA): - if code[jump_back+4] in (JA, JF): - if self.get_target(jump_back+4) == self.get_target(end): - self.fixed_jumps[pos] = jump_back+4 - end = jump_back+4 - elif target < pos: - self.fixed_jumps[pos] = jump_back+4 - end = jump_back+4 - target = self.get_target(jump_back, JA) - - if code[target] in (FOR_ITER, GET_ITER): - loop_type = 'for' - else: - loop_type = 'while' - test = self.prev[next_line_byte] - if test == pos: - loop_type = 'while 1' - elif self.code[test] in hasjabs+hasjrel: - self.ignore_if.add(test) - test_target = self.get_target(test) - if test_target > (jump_back+3): - jump_back = test_target - self.not_continue.add(jump_back) - self.loops.append(target) - self.structs.append({'type': loop_type + '-loop', - 'start': target, - 'end': jump_back}) - if jump_back+3 != end: - self.structs.append({'type': loop_type + '-else', - 'start': jump_back+3, - 'end': end}) - elif op == SETUP_EXCEPT: - start = pos+3 - target = self.get_target(pos, op) - end = self.restrict_to_parent(target, parent) - if target != end: - self.fixed_jumps[pos] = end - # print target, end, parent - # Add the try block - self.structs.append({'type': 'try', - 'start': start, - 'end': end-4}) - # Now isolate the except and else blocks - end_else = start_else = self.get_target(self.prev[end]) - - # Add the except blocks - i = end - while self.code[i] != END_FINALLY: - jmp = self.next_except_jump(i) - if self.code[jmp] == RETURN_VALUE: - self.structs.append({'type': 'except', - 'start': i, - 'end': jmp+1}) - i = jmp + 1 - else: - if self.get_target(jmp) != start_else: - end_else = self.get_target(jmp) - if self.code[jmp] == JF: - self.fixed_jumps[jmp] = -1 - self.structs.append({'type': 'except', - 'start': i, - 'end': jmp}) - i = jmp + 3 - - # Add the try-else block - if end_else != start_else: - r_end_else = self.restrict_to_parent(end_else, parent) - self.structs.append({'type': 'try-else', - 'start': i+1, - 'end': r_end_else}) - self.fixed_jumps[i] = r_end_else - else: - self.fixed_jumps[i] = i+1 - - elif op in (PJIF, PJIT): - start = pos+3 - target = self.get_target(pos, op) - rtarget = self.restrict_to_parent(target, parent) - pre = self.prev - - if target != rtarget and parent['type'] == 'and/or': - self.fixed_jumps[pos] = rtarget - return - # does this jump to right after another cond jump? - # if so, it's part of a larger conditional - if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, - PJIF, PJIT)) and (target > pos): - self.fixed_jumps[pos] = pre[target] - self.structs.append({'type': 'and/or', - 'start': start, - 'end': pre[target]}) - return - - # is this an if and - if op == PJIF: - match = self.rem_or(start, self.next_stmt[pos], PJIF, target) - match = self.remove_mid_line_ifs(match) - - if match: - if code[pre[rtarget]] in (JF, JA) \ - and pre[rtarget] not in self.stmts \ - and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: - if code[pre[pre[rtarget]]] == JA \ - and self.remove_mid_line_ifs([pos]) \ - and target == self.get_target(pre[pre[rtarget]]) \ - and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ - and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))): - pass - elif code[pre[pre[rtarget]]] == RETURN_VALUE \ - and self.remove_mid_line_ifs([pos]) \ - and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, - pre[pre[rtarget]], - (PJIF, PJIT), target))) - | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], - (PJIF, PJIT, JA), pre[rtarget], True))))): - pass - else: - fix = None - jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF) - last_jump_good = True - for j in jump_ifs: - if target == self.get_target(j): - if self.lines[j].next == j+3 and last_jump_good: - fix = j - break - else: - last_jump_good = False - self.fixed_jumps[pos] = fix or match[-1] - return - else: - self.fixed_jumps[pos] = match[-1] - return - else: # op == PJIT - if (pos+3) in self.load_asserts: - if code[pre[rtarget]] == RAISE_VARARGS: - return - self.load_asserts.remove(pos+3) - - next = self.next_stmt[pos] - if pre[next] == pos: - pass - elif code[next] in (JF, JA) and target == self.get_target(next): - if code[pre[next]] == PJIF: - if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): - self.fixed_jumps[pos] = pre[next] - return - elif code[next] == JA and code[target] in (JA, JF): - next_target = self.get_target(next) - if self.get_target(target) == next_target: - self.fixed_jumps[pos] = pre[next] - return - elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target): - self.fixed_jumps[pos] = pre[next] - return - - # don't add a struct for a while test, it's already taken care of - if pos in self.ignore_if: - return - - if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ - and pre[rtarget] != pos and pre[pre[rtarget]] != pos: - if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK: - if code[pre[pre[rtarget]]] != JA: - pass - elif self.get_target(pre[pre[rtarget]]) != target: - pass - else: - rtarget = pre[rtarget] - else: - rtarget = pre[rtarget] - # does the if jump just beyond a jump op, then this is probably an if statement - if code[pre[rtarget]] in (JA, JF): - if_end = self.get_target(pre[rtarget]) - - # is this a loop not an if? - if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP): - if(if_end > start): - return - - end = self.restrict_to_parent(if_end, parent) - - self.structs.append({'type': 'if-then', - 'start': start, - 'end': pre[rtarget]}) - self.not_continue.add(pre[rtarget]) - - if rtarget < end: - self.structs.append({'type': 'if-else', - 'start': rtarget, - 'end': end}) - elif code[pre[rtarget]] == RETURN_VALUE: - self.structs.append({'type': 'if-then', - 'start': start, - 'end': rtarget}) - self.return_end_ifs.add(pre[rtarget]) - - elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - target = self.get_target(pos, op) - self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) - - def find_jump_targets(self): - ''' - Detect all offsets in a byte code which are jump targets. - - Return the list of offsets. - - This procedure is modelled after dis.findlabels(), but here - for each target the number of jumps are counted. - ''' - - n = len(self.code) - self.structs = [{'type': 'root', - 'start': 0, - 'end': n-1}] - self.loops = [] # All loop entry points - self.fixed_jumps = {} # Map fixed jumps to their real destination - self.ignore_if = set() - self.build_stmt_indices() - - # Containers filled by detect_structure() - self.not_continue = set() - self.return_end_ifs = set() - - targets = {} - for i in self.op_range(0, n): - op = self.code[i] - - # Determine structures and fix jumps in Python versions - # since 2.3 - self.detect_structure(i, op) - - if op >= HAVE_ARGUMENT: - label = self.fixed_jumps.get(i) - oparg = self.code[i+1] + self.code[i+2] * 256 - if label is None: - if op in hasjrel and op != FOR_ITER: - label = i + 3 + oparg - elif op in hasjabs: - if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - if (oparg > i): - label = oparg - - if label is not None and label != -1: - targets[label] = targets.get(label, []) + [i] - elif op == END_FINALLY and i in self.fixed_jumps: - label = self.fixed_jumps[i] - targets[label] = targets.get(label, []) + [i] - return targets + pass if __name__ == "__main__": - co = inspect.currentframe().f_code - tokens, customize = Scanner27().disassemble(co) - for t in tokens: - print(t) - pass + from uncompyle6 import PYTHON_VERSION + if PYTHON_VERSION == 2.7: + import inspect + co = inspect.currentframe().f_code + tokens, customize = Scanner27().disassemble(co) + for t in tokens: + print(t.format()) + pass + else: + print("Need to be Python 2.7 to demo; I am %s." % + PYTHON_VERSION) diff --git a/uncompyle6/scanners/scanner3.py b/uncompyle6/scanners/scanner3.py index 532d78f4..4254df0b 100644 --- a/uncompyle6/scanners/scanner3.py +++ b/uncompyle6/scanners/scanner3.py @@ -1,7 +1,6 @@ # Copyright (c) 2015, 2016 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel -# Copyright (c) 1999 John Aycock """ Python 3 Generic bytecode scanner/deparser @@ -29,8 +28,6 @@ from array import array from xdis.code import iscode from xdis.bytecode import Bytecode, findlinestarts from uncompyle6.scanner import Token -from uncompyle6 import PYTHON3 - # Get all the opcodes into globals import xdis.opcodes.opcode_33 as op3 @@ -205,213 +202,6 @@ class Scanner3(scan.Scanner): pass return tokens, {} - def disassemble_generic(self, co, classname=None, code_objects={}): - """ - Convert code object into a sequence of tokens. - - The below is based on (an older version?) of Python dis.disassemble_bytes(). - """ - - # dis.disassemble(co) # DEBUG - # Container for tokens - tokens = [] - customize = {} - self.code = code = array('B', co.co_code) - codelen = len(code) - self.build_lines_data(co) - self.build_prev_op() - self.code_objects = code_objects - - # self.lines contains (block,addrLastInstr) - if classname: - classname = '_' + classname.lstrip('_') + '__' - - def unmangle(name): - if name.startswith(classname) and name[-2:] != '__': - return name[len(classname) - 2:] - return name - - free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] - names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] - else: - free = co.co_cellvars + co.co_freevars - names = co.co_names - varnames = co.co_varnames - pass - - # Scan for assertions. Later we will - # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those - # assertions - self.load_asserts = set() - for i in self.op_range(0, codelen): - if (self.code[i] == POP_JUMP_IF_TRUE and - self.code[i+3] == LOAD_GLOBAL): - if names[self.get_argument(i+3)] == 'AssertionError': - self.load_asserts.add(i+3) - - # Get jump targets - # Format: {target offset: [jump offsets]} - jump_targets = self.find_jump_targets() - - # contains (code, [addrRefToCode]) - last_stmt = self.next_stmt[0] - i = self.next_stmt[last_stmt] - replace = {} - - imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) - if len(imports) > 1: - last_import = imports[0] - for i in imports[1:]: - if self.lines[last_import].next > i: - if self.code[last_import] == IMPORT_NAME == self.code[i]: - replace[i] = 'IMPORT_NAME_CONT' - last_import = i - - # Initialize extended arg at 0. When extended arg op is encountered, - # variable preserved for next cycle and added as arg for next op - extended_arg = 0 - - for offset in self.op_range(0, codelen): - - # Add jump target tokens - if offset in jump_targets: - jump_idx = 0 - for jump_offset in jump_targets[offset]: - tokens.append(Token('COME_FROM', None, repr(jump_offset), - offset='%s_%s' % (offset, jump_idx))) - jump_idx += 1 - pass - pass - - op = code[offset] - op_name = self.opname[op] - - oparg = None; pattr = None - - if op >= op3.HAVE_ARGUMENT: - oparg = self.get_argument(offset) + extended_arg - extended_arg = 0 - if op == op3.EXTENDED_ARG: - extended_arg = oparg * scan.L65536 - continue - if op in op3.hasconst: - const = co.co_consts[oparg] - if not PYTHON3 and isinstance(const, str): - if const in code_objects: - const = code_objects[const] - if iscode(const): - oparg = const - if const.co_name == '': - assert op_name == 'LOAD_CONST' - op_name = 'LOAD_LAMBDA' - elif const.co_name == '': - op_name = 'LOAD_GENEXPR' - elif const.co_name == '': - op_name = 'LOAD_DICTCOMP' - elif const.co_name == '': - op_name = 'LOAD_SETCOMP' - elif const.co_name == '': - op_name = 'LOAD_LISTCOMP' - # verify() uses 'pattr' for comparison, since 'attr' - # now holds Code(const) and thus can not be used - # for comparison (todo: think about changing this) - # pattr = 'code_object @ 0x%x %s->%s' %\ - # (id(const), const.co_filename, const.co_name) - pattr = '' - else: - pattr = const - elif op in op3.hasname: - pattr = names[oparg] - elif op in op3.hasjrel: - pattr = repr(offset + 3 + oparg) - elif op in op3.hasjabs: - pattr = repr(oparg) - elif op in op3.haslocal: - pattr = varnames[oparg] - elif op in op3.hascompare: - pattr = op3.cmp_op[oparg] - elif op in op3.hasfree: - pattr = free[oparg] - - if op_name == 'MAKE_FUNCTION': - argc = oparg - attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) - pos_args, name_pair_args, annotate_args = attr - if name_pair_args > 0: - op_name = 'MAKE_FUNCTION_N%d' % name_pair_args - pass - if annotate_args > 0: - op_name = '%s_A_%d' % [op_name, annotate_args] - pass - op_name = '%s_%d' % (op_name, pos_args) - pattr = ("%d positional, %d keyword pair, %d annotated" % - (pos_args, name_pair_args, annotate_args)) - tokens.append( - Token( - type_ = op_name, - attr = (pos_args, name_pair_args, annotate_args), - pattr = pattr, - offset = offset, - linestart = linestart) - ) - continue - elif op_name in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', - 'UNPACK_SEQUENCE', - 'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE', - 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', - 'CALL_FUNCTION_VAR_KW', 'RAISE_VARARGS' - ): - # CALL_FUNCTION OP renaming is done as a custom rule in parse3 - if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', - 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW', - ): - op_name = '%s_%d' % (op_name, oparg) - if op_name != 'BUILD_SLICE': - customize[op_name] = oparg - elif op_name == 'JUMP_ABSOLUTE': - target = self.get_target(offset) - if target < offset: - if (offset in self.stmts - and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) - and offset not in self.not_continue): - op_name = 'CONTINUE' - else: - op_name = 'JUMP_BACK' - pass - pass - pass - elif op_name == 'JUMP_FORWARD': - # Python 3.5 will optimize out a JUMP_FORWARD to the - # next instruction while Python 3.2 won't. Smplify - # grammar rules working with both 3.2 and 3.5, - # by optimizing the way Python 3.5 does it. - # - # We may however want to consider whether we do - # this in 3.5 or not. - if oparg == 0 and self.version >= 3.5: - tokens.append(Token('NOP', oparg, pattr, offset, linestart)) - continue - elif op_name == 'LOAD_GLOBAL': - if offset in self.load_asserts: - op_name = 'LOAD_ASSERT' - - if offset in self.linestarts: - linestart = self.linestarts[offset] - else: - linestart = None - - if offset not in replace: - tokens.append(Token(op_name, oparg, pattr, offset, linestart)) - else: - tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) - pass - - # debug: - # for t in tokens: - # print(t) - return tokens, customize - def build_lines_data(self, code_obj): """ Generate various line-related helper data. @@ -905,7 +695,7 @@ if __name__ == "__main__": import inspect co = inspect.currentframe().f_code from uncompyle6 import PYTHON_VERSION - tokens, customize = Scanner3(PYTHON_VERSION).disassemble3(co) + tokens, customize = Scanner3(PYTHON_VERSION).disassemble(co) for t in tokens: print(t.format()) else: