Start to DRY Python 2 scanners...

Get 2.7 opcodes from xdis.
This commit is contained in:
rocky
2016-05-28 19:34:12 -04:00
parent 90741148ad
commit e70e7bfc16
8 changed files with 801 additions and 1165 deletions

View File

@@ -37,7 +37,7 @@ entry_points={
]} ]}
ftp_url = None ftp_url = None
install_requires = ['spark-parser >= 1.2.1', install_requires = ['spark-parser >= 1.2.1',
'xdis >= 1.0.2'] 'xdis >= 1.0.4']
license = 'MIT' license = 'MIT'
mailing_list = 'python-debugger@googlegroups.com' mailing_list = 'python-debugger@googlegroups.com'
modname = 'uncompyle6' modname = 'uncompyle6'

View File

@@ -1,219 +0,0 @@
"""
CPython 2.7 bytecode opcodes
This is used in scanner (bytecode disassembly) and parser (Python grammar).
This is a superset of Python 3.4's opcode.py with some opcodes that simplify
parsing and semantic interpretation.
"""
# FIXME: DRY this along the lines of opcode_3x.
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
hasArgumentExtended = []
PJIF = PJIT = JA = JF = 0
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
globals().update({name: op})
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
def updateGlobal():
globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': opmap['JUMP_ABSOLUTE']})
globals().update({'JF': opmap['JUMP_FORWARD']})
globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()]))
globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)})
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
def_op('LIST_APPEND', 94)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_SET', 104) # Number of set items
def_op('BUILD_MAP', 105) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 106) # Index in name list
def_op('COMPARE_OP', 107) # Comparison operator
hascompare.append(107)
name_op('IMPORT_NAME', 108) # Index in name list
name_op('IMPORT_FROM', 109) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code
jabs_op('JUMP_IF_TRUE_OR_POP', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # ""
jabs_op('POP_JUMP_IF_FALSE', 114) # ""
jabs_op('POP_JUMP_IF_TRUE', 115) # ""
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
jrel_op('SETUP_WITH', 143)
def_op('EXTENDED_ARG', 145)
EXTENDED_ARG = 145
def_op('SET_ADD', 146)
def_op('MAP_ADD', 147)
# PyPy magic opcodes
# FIXME: see if we can conditionally add them
def_op('LOOKUP_METHOD', 201)
def_op('CALL_METHOD', 202)
def_op('BUILD_LIST_FROM_ARG', 203)
def_op('JUMP_IF_NOT_DEBUG', 204)
updateGlobal()
del def_op, name_op, jrel_op, jabs_op
from uncompyle6 import PYTHON_VERSION
if PYTHON_VERSION == 2.7:
import dis
# print(set(dis.opmap.items()) - set(opmap.items()))
assert all(item in opmap.items() for item in dis.opmap.items())

View File

@@ -30,9 +30,10 @@ if PYTHON3:
else: else:
L65536 = long(65536) # NOQA L65536 = long(65536) # NOQA
from uncompyle6.opcodes import (opcode_25, opcode_26, opcode_27) from uncompyle6.opcodes import (opcode_25, opcode_26)
from xdis.opcodes import (opcode_32, opcode_33, opcode_34, opcode_35) from xdis.opcodes import (opcode_27,
opcode_32, opcode_33, opcode_34, opcode_35)
class Code(object): class Code(object):
@@ -212,16 +213,6 @@ class Scanner(object):
result.append(offset) result.append(offset)
return result return result
def op_size(self, op):
"""
Return size of operator with its arguments
for given opcode <op>.
"""
if op < self.opc.HAVE_ARGUMENT and op not in self.opc.hasArgumentExtended:
return 1
else:
return 3
def op_hasArgument(self, op): def op_hasArgument(self, op):
return self.op_size(op) > 1 return self.op_size(op) > 1

772
uncompyle6/scanners/scanner2.py Executable file
View File

@@ -0,0 +1,772 @@
# Copyright (c) 2015, 2016 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
"""
Python 2 Generic bytecode scanner/deparser
This overlaps various Python3's dis module, but it can be run from
Python versions other than the version running this code. Notably,
run from Python version 2.
Also we *modify* the instruction sequence to assist deparsing code.
For example:
- we add "COME_FROM" instructions to help in figuring out
conditional branching and looping.
- LOAD_CONSTs are classified further into the type of thing
they load:
lambda's, genexpr's, {dict,set,list} comprehension's,
- PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE}
Finally we save token information.
"""
from __future__ import print_function
import dis, inspect
from collections import namedtuple
from array import array
from xdis.code import iscode
# FIXME: remove
from xdis.opcodes.opcode_27 import * # NOQA
import uncompyle6.scanner as scan
class Scanner2(scan.Scanner):
def __init__(self, version):
scan.Scanner.__init__(self, version)
def disassemble(self, co, classname=None, code_objects={}):
"""
Disassemble a Python 2 code object, returning a list of 'Token'.
Various tranformations are made to assist the deparsing grammar.
For example:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments
The main part of this procedure is modelled after
dis.disassemble().
"""
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
self.build_prev_op(n)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.load_asserts = set()
for i in self.op_range(0, n):
if self.code[i] == self.opc.PJIT and self.code[i+3] == self.opc.LOAD_GLOBAL:
if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3)
cf = self.find_jump_targets()
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == self.opc.PRINT_ITEM:
if self.code[i] == self.opc.PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == self.opc.PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (self.opc.IMPORT_NAME, self.opc.IMPORT_FROM,
self.opc.IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == self.opc.IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
tokens.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k)))
k += 1
op = self.code[offset]
op_name = self.opc.opname[op]
oparg = None; pattr = None
if op >= self.opc.HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == self.opc.EXTENDED_ARG:
extended_arg = oparg * scan.L65536
continue
if op in self.opc.hasconst:
const = co.co_consts[oparg]
if iscode(const):
oparg = const
if const.co_name == '<lambda>':
assert op_name == 'LOAD_CONST'
op_name = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
op_name = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in self.opc.hasname:
pattr = names[oparg]
elif op in self.opc.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in self.opc.hasjabs:
pattr = repr(oparg)
elif op in self.opc.haslocal:
pattr = varnames[oparg]
elif op in self.opc.hascompare:
pattr = self.opc.cmp_op[oparg]
elif op in self.opc.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[self.prev[offset]] == LOAD_CLOSURE:
continue
else:
op_name = '%s_%d' % (op_name, oparg)
if op != BUILD_SLICE:
customize[op_name] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
op_name = 'CONTINUE'
else:
op_name = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
if offset in self.load_asserts:
op_name = 'LOAD_ASSERT'
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF'
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
if offset not in replace:
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
else:
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
return tokens, customize
def disassemble_native(self, co, classname=None, code_objects={}):
"""
Like disassemble3 but doesn't try to adjust any opcodes.
"""
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
extended_arg = 0
for offset in self.op_range(0, n):
op = self.code[offset]
op_name = self.opc.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == EXTENDED_ARG:
extended_arg = oparg * scan.L65536
continue
if op in hasconst:
pattr = co.co_consts[oparg]
elif op in hasname:
pattr = names[oparg]
elif op in hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in hasjabs:
pattr = repr(oparg)
elif op in haslocal:
pattr = varnames[oparg]
elif op in hascompare:
pattr = cmp_op[oparg]
elif op in hasfree:
pattr = free[oparg]
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
pass
return tokens, customize
def op_size(self, op):
"""
Return size of operator with its arguments
for given opcode <op>.
"""
if op < self.opc.HAVE_ARGUMENT and op not in self.opc.hasArgumentExtended:
return 1
else:
return 3
def setup_code(self, co):
"""
Creates Python-independent bytecode structure (byte array) in
self.code and records previous instruction in self.prev
The size of self.code is returned
"""
self.code = array('B', co.co_code)
n = -1
for i in self.op_range(0, len(self.code)):
if self.code[i] in (self.opc.RETURN_VALUE, self.opc.END_FINALLY):
n = i + 1
pass
pass
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
self.code = array('B', co.co_code[:n])
return n
def build_prev_op(self, n):
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
pass
pass
def build_lines_data(self, co, n):
"""
Initializes self.lines and self.linesstartoffsets
"""
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
# linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
linestarts = list(dis.findlinestarts(co))
self.linestartoffsets = {}
for offset, lineno in linestarts:
self.linestartoffsets[offset] = lineno
j = 0
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
return
def build_stmt_indices(self):
code = self.code
start = 0
end = len(code)
stmt_opcodes = set([
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, self.opc.CONTINUE_LOOP,
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,
self.opc.SETUP_EXCEPT, self.opc.SETUP_WITH,
self.opc.POP_BLOCK, self.opc.STORE_FAST, self.opc.DELETE_FAST,
self.opc.STORE_DEREF, self.opc.STORE_GLOBAL,
self.opc.DELETE_GLOBAL, self.opc.STORE_NAME,
self.opc.DELETE_NAME, self.opc.STORE_ATTR,
self.opc.DELETE_ATTR,
## FIXME keep going.
STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
])
stmt_opcode_seqs = [(self.opc.PJIF, self.opc.JF),
(self.opc.PJIF, self.opc.JA),
(self.opc.PJIT, self.opc.JF),
(self.opc.PJIT, self.opc.JA)]
designator_ops = set([
self.opc.STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
])
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == self.opc.LIST_APPEND: # list comprehension
stmts.remove(s)
continue
elif code[s] == self.opc.POP_TOP and code[self.prev[s]] == self.opc.ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == self.opc.FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [end] * (end-len(slist))
def next_except_jump(self, start):
'''
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
'''
if self.code[start] == DUP_TOP:
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
self.not_continue.add(jmp)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
self.not_continue.add(self.prev[i])
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def detect_structure(self, pos, op=None):
'''
Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+
'''
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
# Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
if op == SETUP_LOOP:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF):
if code[self.prev[end]] == RETURN_VALUE or \
(code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE):
jump_back = None
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA, start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
elif self.code[test] in hasjabs+hasjrel:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.not_continue.add(jump_back)
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
# print target, end, parent
# Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
# Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
# Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
# Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
# does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
pre[pre[rtarget]],
(PJIF, PJIT), target)))
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]],
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
if (pos+3) in self.load_asserts:
if code[pre[rtarget]] == RAISE_VARARGS:
return
self.load_asserts.remove(pos+3)
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF):
next_target = self.get_target(next)
if self.get_target(target) == next_target:
self.fixed_jumps[pos] = pre[next]
return
elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target):
self.fixed_jumps[pos] = pre[next]
return
# don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos:
if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK:
if code[pre[pre[rtarget]]] != JA:
pass
elif self.get_target(pre[pre[rtarget]]) != target:
pass
else:
rtarget = pre[rtarget]
else:
rtarget = pre[rtarget]
# does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
# is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self):
'''
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlabels(), but here
for each target the number of jumps are counted.
'''
n = len(self.code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] # All loop entry points
self.fixed_jumps = {} # Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = self.code[i]
# Determine structures and fix jumps in Python versions
# since 2.3
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.code[i+1] + self.code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
if PYTHON_VERSION >= 2.3:
co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner2(PYTHON_VERSION).disassemble(co)
for t in tokens:
print(t.format())
else:
print("Need to be Python 3.2 or greater to demo; I am %s." %
PYTHON_VERSION)
pass

View File

@@ -16,11 +16,11 @@ from array import array
import dis import dis
from uncompyle6.opcodes.opcode_25 import * from uncompyle6.opcodes.opcode_25 import *
import uncompyle6.scanner as scan import uncompyle6.scanners.scanner2 as scan
class Scanner25(scan.Scanner): class Scanner25(scan.Scanner2):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, 2.5) super(Scanner25, self).__init__(2.5)
def disassemble(self, co, classname=None, code_objects={}): def disassemble(self, co, classname=None, code_objects={}):
''' '''

View File

@@ -16,11 +16,11 @@ from array import array
from uncompyle6.opcodes.opcode_26 import * from uncompyle6.opcodes.opcode_26 import *
import dis import dis
import uncompyle6.scanner as scan import uncompyle6.scanners.scanner2 as scan
class Scanner26(scan.Scanner): class Scanner26(scan.Scanner2):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, 2.6) super(Scanner26, self).__init__(2.6)
def disassemble(self, co, classname=None, code_objects={}): def disassemble(self, co, classname=None, code_objects={}):
''' '''

View File

@@ -1,7 +1,4 @@
# Copyright (c) 2015, 2016 by Rocky Bernstein # Copyright (c) 2015, 2016 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
""" """
Python 2.7 bytecode scanner/deparser Python 2.7 bytecode scanner/deparser
@@ -13,722 +10,27 @@ for later use in deparsing.
from __future__ import print_function from __future__ import print_function
import dis, inspect from uncompyle6.scanners.scanner2 import Scanner2
from collections import namedtuple
from array import array
from xdis.code import iscode # bytecode verification, verify(), uses JUMP_OPs from here
from uncompyle6.opcodes.opcode_27 import * # NOQA from xdis.opcodes import opcode_27
import uncompyle6.scanner as scan JUMP_OPs = opcode_27.JUMP_OPs
class Scanner27(scan.Scanner): class Scanner27(Scanner2):
def __init__(self): def __init__(self):
scan.Scanner.__init__(self, 2.7) super(Scanner27, self).__init__(2.7)
def disassemble(self, co, classname=None, code_objects={}):
"""
Disassemble a Python 3 ode object, returning a list of 'Token'.
Various tranformations are made to assist the deparsing grammar.
For example:
- various types of LOAD_CONST's are categorized in terms of what they load
- COME_FROM instructions are added to assist parsing control structures
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments
The main part of this procedure is modelled after
dis.disassemble().
"""
# import dis; dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
self.build_prev_op(n)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.load_asserts = set()
for i in self.op_range(0, n):
if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL:
if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3)
cf = self.find_jump_targets()
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
tokens.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k)))
k += 1
op = self.code[offset]
op_name = opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == EXTENDED_ARG:
extended_arg = oparg * scan.L65536
continue
if op in hasconst:
const = co.co_consts[oparg]
if iscode(const):
oparg = const
if const.co_name == '<lambda>':
assert op_name == 'LOAD_CONST'
op_name = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
op_name = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in hasname:
pattr = names[oparg]
elif op in hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in hasjabs:
pattr = repr(oparg)
elif op in haslocal:
pattr = varnames[oparg]
elif op in hascompare:
pattr = cmp_op[oparg]
elif op in hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[self.prev[offset]] == LOAD_CLOSURE:
continue
else:
op_name = '%s_%d' % (op_name, oparg)
if op != BUILD_SLICE:
customize[op_name] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
op_name = 'CONTINUE'
else:
op_name = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
if offset in self.load_asserts:
op_name = 'LOAD_ASSERT'
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
op_name = 'RETURN_END_IF'
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
if offset not in replace:
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
else:
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
return tokens, customize
def disassemble_native(self, co, classname=None, code_objects={}):
"""
Like disassemble3 but doesn't try to adjust any opcodes.
"""
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
extended_arg = 0
for offset in self.op_range(0, n):
op = self.code[offset]
op_name = opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == EXTENDED_ARG:
extended_arg = oparg * scan.L65536
continue
if op in hasconst:
pattr = co.co_consts[oparg]
elif op in hasname:
pattr = names[oparg]
elif op in hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in hasjabs:
pattr = repr(oparg)
elif op in haslocal:
pattr = varnames[oparg]
elif op in hascompare:
pattr = cmp_op[oparg]
elif op in hasfree:
pattr = free[oparg]
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
pass
return tokens, customize
def setup_code(self, co):
"""
Creates Python-independent bytecode structure (byte array) in
self.code and records previous instruction in self.prev
The size of self.code is returned
"""
self.code = array('B', co.co_code)
n = -1
for i in self.op_range(0, len(self.code)):
if self.code[i] in (RETURN_VALUE, END_FINALLY):
n = i + 1
pass
pass
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
self.code = array('B', co.co_code[:n])
return n
def build_prev_op(self, n):
self.prev = [0]
# mapping addresses of instruction & argument
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
pass
pass
def build_lines_data(self, co, n):
"""
Initializes self.lines and self.linesstartoffsets
"""
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
# linestarts is a tuple of (offset, line number).
# Turn that in a has that we can index
linestarts = list(dis.findlinestarts(co))
self.linestartoffsets = {}
for offset, lineno in linestarts:
self.linestartoffsets[offset] = lineno
j = 0
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
prev_line_no = start_byte
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
return return
pass
def build_stmt_indices(self):
code = self.code
start = 0
end = len(code)
stmt_opcodes = set([
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
])
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = set([
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
])
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: # list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [end] * (end-len(slist))
def next_except_jump(self, start):
'''
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
'''
if self.code[start] == DUP_TOP:
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
self.not_continue.add(jmp)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
self.not_continue.add(self.prev[i])
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def detect_structure(self, pos, op=None):
'''
Detect type of block structures and their boundaries to fix optimized jumps
in python2.3+
'''
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
# Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
if op == SETUP_LOOP:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF):
if code[self.prev[end]] == RETURN_VALUE or \
(code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE):
jump_back = None
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA, start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
elif self.code[test] in hasjabs+hasjrel:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.not_continue.add(jump_back)
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
# print target, end, parent
# Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
# Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
# Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
# Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
# does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
pre[pre[rtarget]],
(PJIF, PJIT), target)))
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]],
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
if (pos+3) in self.load_asserts:
if code[pre[rtarget]] == RAISE_VARARGS:
return
self.load_asserts.remove(pos+3)
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF):
next_target = self.get_target(next)
if self.get_target(target) == next_target:
self.fixed_jumps[pos] = pre[next]
return
elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target):
self.fixed_jumps[pos] = pre[next]
return
# don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos:
if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK:
if code[pre[pre[rtarget]]] != JA:
pass
elif self.get_target(pre[pre[rtarget]]) != target:
pass
else:
rtarget = pre[rtarget]
else:
rtarget = pre[rtarget]
# does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
# is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self):
'''
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlabels(), but here
for each target the number of jumps are counted.
'''
n = len(self.code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] # All loop entry points
self.fixed_jumps = {} # Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
# Containers filled by detect_structure()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = self.code[i]
# Determine structures and fix jumps in Python versions
# since 2.3
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.code[i+1] + self.code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets
if __name__ == "__main__": if __name__ == "__main__":
co = inspect.currentframe().f_code from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner27().disassemble(co) if PYTHON_VERSION == 2.7:
for t in tokens: import inspect
print(t) co = inspect.currentframe().f_code
pass tokens, customize = Scanner27().disassemble(co)
for t in tokens:
print(t.format())
pass
else:
print("Need to be Python 2.7 to demo; I am %s." %
PYTHON_VERSION)

View File

@@ -1,7 +1,6 @@
# Copyright (c) 2015, 2016 by Rocky Bernstein # Copyright (c) 2015, 2016 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
""" """
Python 3 Generic bytecode scanner/deparser Python 3 Generic bytecode scanner/deparser
@@ -29,8 +28,6 @@ from array import array
from xdis.code import iscode from xdis.code import iscode
from xdis.bytecode import Bytecode, findlinestarts from xdis.bytecode import Bytecode, findlinestarts
from uncompyle6.scanner import Token from uncompyle6.scanner import Token
from uncompyle6 import PYTHON3
# Get all the opcodes into globals # Get all the opcodes into globals
import xdis.opcodes.opcode_33 as op3 import xdis.opcodes.opcode_33 as op3
@@ -205,213 +202,6 @@ class Scanner3(scan.Scanner):
pass pass
return tokens, {} return tokens, {}
def disassemble_generic(self, co, classname=None, code_objects={}):
"""
Convert code object <co> into a sequence of tokens.
The below is based on (an older version?) of Python dis.disassemble_bytes().
"""
# dis.disassemble(co) # DEBUG
# Container for tokens
tokens = []
customize = {}
self.code = code = array('B', co.co_code)
codelen = len(code)
self.build_lines_data(co)
self.build_prev_op()
self.code_objects = code_objects
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
pass
# Scan for assertions. Later we will
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
# assertions
self.load_asserts = set()
for i in self.op_range(0, codelen):
if (self.code[i] == POP_JUMP_IF_TRUE and
self.code[i+3] == LOAD_GLOBAL):
if names[self.get_argument(i+3)] == 'AssertionError':
self.load_asserts.add(i+3)
# Get jump targets
# Format: {target offset: [jump offsets]}
jump_targets = self.find_jump_targets()
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
# Initialize extended arg at 0. When extended arg op is encountered,
# variable preserved for next cycle and added as arg for next op
extended_arg = 0
for offset in self.op_range(0, codelen):
# Add jump target tokens
if offset in jump_targets:
jump_idx = 0
for jump_offset in jump_targets[offset]:
tokens.append(Token('COME_FROM', None, repr(jump_offset),
offset='%s_%s' % (offset, jump_idx)))
jump_idx += 1
pass
pass
op = code[offset]
op_name = self.opname[op]
oparg = None; pattr = None
if op >= op3.HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == op3.EXTENDED_ARG:
extended_arg = oparg * scan.L65536
continue
if op in op3.hasconst:
const = co.co_consts[oparg]
if not PYTHON3 and isinstance(const, str):
if const in code_objects:
const = code_objects[const]
if iscode(const):
oparg = const
if const.co_name == '<lambda>':
assert op_name == 'LOAD_CONST'
op_name = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
op_name = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
elif const.co_name == '<listcomp>':
op_name = 'LOAD_LISTCOMP'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in op3.hasname:
pattr = names[oparg]
elif op in op3.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in op3.hasjabs:
pattr = repr(oparg)
elif op in op3.haslocal:
pattr = varnames[oparg]
elif op in op3.hascompare:
pattr = op3.cmp_op[oparg]
elif op in op3.hasfree:
pattr = free[oparg]
if op_name == 'MAKE_FUNCTION':
argc = oparg
attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF)
pos_args, name_pair_args, annotate_args = attr
if name_pair_args > 0:
op_name = 'MAKE_FUNCTION_N%d' % name_pair_args
pass
if annotate_args > 0:
op_name = '%s_A_%d' % [op_name, annotate_args]
pass
op_name = '%s_%d' % (op_name, pos_args)
pattr = ("%d positional, %d keyword pair, %d annotated" %
(pos_args, name_pair_args, annotate_args))
tokens.append(
Token(
type_ = op_name,
attr = (pos_args, name_pair_args, annotate_args),
pattr = pattr,
offset = offset,
linestart = linestart)
)
continue
elif op_name in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
'UNPACK_SEQUENCE',
'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE',
'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
'CALL_FUNCTION_VAR_KW', 'RAISE_VARARGS'
):
# CALL_FUNCTION OP renaming is done as a custom rule in parse3
if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW',
):
op_name = '%s_%d' % (op_name, oparg)
if op_name != 'BUILD_SLICE':
customize[op_name] = oparg
elif op_name == 'JUMP_ABSOLUTE':
target = self.get_target(offset)
if target < offset:
if (offset in self.stmts
and self.code[offset+3] not in (END_FINALLY, POP_BLOCK)
and offset not in self.not_continue):
op_name = 'CONTINUE'
else:
op_name = 'JUMP_BACK'
pass
pass
pass
elif op_name == 'JUMP_FORWARD':
# Python 3.5 will optimize out a JUMP_FORWARD to the
# next instruction while Python 3.2 won't. Smplify
# grammar rules working with both 3.2 and 3.5,
# by optimizing the way Python 3.5 does it.
#
# We may however want to consider whether we do
# this in 3.5 or not.
if oparg == 0 and self.version >= 3.5:
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
continue
elif op_name == 'LOAD_GLOBAL':
if offset in self.load_asserts:
op_name = 'LOAD_ASSERT'
if offset in self.linestarts:
linestart = self.linestarts[offset]
else:
linestart = None
if offset not in replace:
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
else:
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
pass
# debug:
# for t in tokens:
# print(t)
return tokens, customize
def build_lines_data(self, code_obj): def build_lines_data(self, code_obj):
""" """
Generate various line-related helper data. Generate various line-related helper data.
@@ -905,7 +695,7 @@ if __name__ == "__main__":
import inspect import inspect
co = inspect.currentframe().f_code co = inspect.currentframe().f_code
from uncompyle6 import PYTHON_VERSION from uncompyle6 import PYTHON_VERSION
tokens, customize = Scanner3(PYTHON_VERSION).disassemble3(co) tokens, customize = Scanner3(PYTHON_VERSION).disassemble(co)
for t in tokens: for t in tokens:
print(t.format()) print(t.format())
else: else: