You've already forked python-uncompyle6
mirror of
https://github.com/rocky/python-uncompyle6.git
synced 2025-08-02 16:44:46 +08:00
Start to DRY Python 2 scanners...
Get 2.7 opcodes from xdis.
This commit is contained in:
@@ -37,7 +37,7 @@ entry_points={
|
||||
]}
|
||||
ftp_url = None
|
||||
install_requires = ['spark-parser >= 1.2.1',
|
||||
'xdis >= 1.0.2']
|
||||
'xdis >= 1.0.4']
|
||||
license = 'MIT'
|
||||
mailing_list = 'python-debugger@googlegroups.com'
|
||||
modname = 'uncompyle6'
|
||||
|
@@ -1,219 +0,0 @@
|
||||
"""
|
||||
CPython 2.7 bytecode opcodes
|
||||
|
||||
This is used in scanner (bytecode disassembly) and parser (Python grammar).
|
||||
|
||||
This is a superset of Python 3.4's opcode.py with some opcodes that simplify
|
||||
parsing and semantic interpretation.
|
||||
"""
|
||||
|
||||
# FIXME: DRY this along the lines of opcode_3x.
|
||||
|
||||
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
|
||||
'is not', 'exception match', 'BAD')
|
||||
|
||||
hasconst = []
|
||||
hasname = []
|
||||
hasjrel = []
|
||||
hasjabs = []
|
||||
haslocal = []
|
||||
hascompare = []
|
||||
hasfree = []
|
||||
hasArgumentExtended = []
|
||||
PJIF = PJIT = JA = JF = 0
|
||||
|
||||
opmap = {}
|
||||
opname = [''] * 256
|
||||
for op in range(256): opname[op] = '<%r>' % (op,)
|
||||
del op
|
||||
|
||||
def def_op(name, op):
|
||||
opname[op] = name
|
||||
opmap[name] = op
|
||||
globals().update({name: op})
|
||||
|
||||
def name_op(name, op):
|
||||
def_op(name, op)
|
||||
hasname.append(op)
|
||||
|
||||
def jrel_op(name, op):
|
||||
def_op(name, op)
|
||||
hasjrel.append(op)
|
||||
|
||||
def jabs_op(name, op):
|
||||
def_op(name, op)
|
||||
hasjabs.append(op)
|
||||
|
||||
def updateGlobal():
|
||||
globals().update({'PJIF': opmap['POP_JUMP_IF_FALSE']})
|
||||
globals().update({'PJIT': opmap['POP_JUMP_IF_TRUE']})
|
||||
globals().update({'JA': opmap['JUMP_ABSOLUTE']})
|
||||
globals().update({'JF': opmap['JUMP_FORWARD']})
|
||||
globals().update(dict([(k.replace('+', '_'), v) for (k, v) in opmap.items()]))
|
||||
globals().update({'JUMP_OPs': map(lambda op: opname[op], hasjrel + hasjabs)})
|
||||
|
||||
# Instruction opcodes for compiled code
|
||||
# Blank lines correspond to available opcodes
|
||||
|
||||
def_op('STOP_CODE', 0)
|
||||
def_op('POP_TOP', 1)
|
||||
def_op('ROT_TWO', 2)
|
||||
def_op('ROT_THREE', 3)
|
||||
def_op('DUP_TOP', 4)
|
||||
def_op('ROT_FOUR', 5)
|
||||
|
||||
def_op('NOP', 9)
|
||||
def_op('UNARY_POSITIVE', 10)
|
||||
def_op('UNARY_NEGATIVE', 11)
|
||||
def_op('UNARY_NOT', 12)
|
||||
def_op('UNARY_CONVERT', 13)
|
||||
|
||||
def_op('UNARY_INVERT', 15)
|
||||
|
||||
def_op('BINARY_POWER', 19)
|
||||
def_op('BINARY_MULTIPLY', 20)
|
||||
def_op('BINARY_DIVIDE', 21)
|
||||
def_op('BINARY_MODULO', 22)
|
||||
def_op('BINARY_ADD', 23)
|
||||
def_op('BINARY_SUBTRACT', 24)
|
||||
def_op('BINARY_SUBSCR', 25)
|
||||
def_op('BINARY_FLOOR_DIVIDE', 26)
|
||||
def_op('BINARY_TRUE_DIVIDE', 27)
|
||||
def_op('INPLACE_FLOOR_DIVIDE', 28)
|
||||
def_op('INPLACE_TRUE_DIVIDE', 29)
|
||||
def_op('SLICE+0', 30)
|
||||
def_op('SLICE+1', 31)
|
||||
def_op('SLICE+2', 32)
|
||||
def_op('SLICE+3', 33)
|
||||
|
||||
def_op('STORE_SLICE+0', 40)
|
||||
def_op('STORE_SLICE+1', 41)
|
||||
def_op('STORE_SLICE+2', 42)
|
||||
def_op('STORE_SLICE+3', 43)
|
||||
|
||||
def_op('DELETE_SLICE+0', 50)
|
||||
def_op('DELETE_SLICE+1', 51)
|
||||
def_op('DELETE_SLICE+2', 52)
|
||||
def_op('DELETE_SLICE+3', 53)
|
||||
|
||||
def_op('STORE_MAP', 54)
|
||||
def_op('INPLACE_ADD', 55)
|
||||
def_op('INPLACE_SUBTRACT', 56)
|
||||
def_op('INPLACE_MULTIPLY', 57)
|
||||
def_op('INPLACE_DIVIDE', 58)
|
||||
def_op('INPLACE_MODULO', 59)
|
||||
def_op('STORE_SUBSCR', 60)
|
||||
def_op('DELETE_SUBSCR', 61)
|
||||
def_op('BINARY_LSHIFT', 62)
|
||||
def_op('BINARY_RSHIFT', 63)
|
||||
def_op('BINARY_AND', 64)
|
||||
def_op('BINARY_XOR', 65)
|
||||
def_op('BINARY_OR', 66)
|
||||
def_op('INPLACE_POWER', 67)
|
||||
def_op('GET_ITER', 68)
|
||||
|
||||
def_op('PRINT_EXPR', 70)
|
||||
def_op('PRINT_ITEM', 71)
|
||||
def_op('PRINT_NEWLINE', 72)
|
||||
def_op('PRINT_ITEM_TO', 73)
|
||||
def_op('PRINT_NEWLINE_TO', 74)
|
||||
def_op('INPLACE_LSHIFT', 75)
|
||||
def_op('INPLACE_RSHIFT', 76)
|
||||
def_op('INPLACE_AND', 77)
|
||||
def_op('INPLACE_XOR', 78)
|
||||
def_op('INPLACE_OR', 79)
|
||||
def_op('BREAK_LOOP', 80)
|
||||
def_op('WITH_CLEANUP', 81)
|
||||
def_op('LOAD_LOCALS', 82)
|
||||
def_op('RETURN_VALUE', 83)
|
||||
def_op('IMPORT_STAR', 84)
|
||||
def_op('EXEC_STMT', 85)
|
||||
def_op('YIELD_VALUE', 86)
|
||||
def_op('POP_BLOCK', 87)
|
||||
def_op('END_FINALLY', 88)
|
||||
def_op('BUILD_CLASS', 89)
|
||||
|
||||
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
|
||||
|
||||
name_op('STORE_NAME', 90) # Index in name list
|
||||
name_op('DELETE_NAME', 91) # ""
|
||||
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
|
||||
jrel_op('FOR_ITER', 93)
|
||||
def_op('LIST_APPEND', 94)
|
||||
|
||||
name_op('STORE_ATTR', 95) # Index in name list
|
||||
name_op('DELETE_ATTR', 96) # ""
|
||||
name_op('STORE_GLOBAL', 97) # ""
|
||||
name_op('DELETE_GLOBAL', 98) # ""
|
||||
def_op('DUP_TOPX', 99) # number of items to duplicate
|
||||
def_op('LOAD_CONST', 100) # Index in const list
|
||||
hasconst.append(100)
|
||||
name_op('LOAD_NAME', 101) # Index in name list
|
||||
def_op('BUILD_TUPLE', 102) # Number of tuple items
|
||||
def_op('BUILD_LIST', 103) # Number of list items
|
||||
def_op('BUILD_SET', 104) # Number of set items
|
||||
def_op('BUILD_MAP', 105) # Number of dict entries (upto 255)
|
||||
name_op('LOAD_ATTR', 106) # Index in name list
|
||||
def_op('COMPARE_OP', 107) # Comparison operator
|
||||
hascompare.append(107)
|
||||
name_op('IMPORT_NAME', 108) # Index in name list
|
||||
name_op('IMPORT_FROM', 109) # Index in name list
|
||||
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
|
||||
jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code
|
||||
jabs_op('JUMP_IF_TRUE_OR_POP', 112) # ""
|
||||
jabs_op('JUMP_ABSOLUTE', 113) # ""
|
||||
jabs_op('POP_JUMP_IF_FALSE', 114) # ""
|
||||
jabs_op('POP_JUMP_IF_TRUE', 115) # ""
|
||||
|
||||
name_op('LOAD_GLOBAL', 116) # Index in name list
|
||||
|
||||
jabs_op('CONTINUE_LOOP', 119) # Target address
|
||||
jrel_op('SETUP_LOOP', 120) # Distance to target address
|
||||
jrel_op('SETUP_EXCEPT', 121) # ""
|
||||
jrel_op('SETUP_FINALLY', 122) # ""
|
||||
|
||||
def_op('LOAD_FAST', 124) # Local variable number
|
||||
haslocal.append(124)
|
||||
def_op('STORE_FAST', 125) # Local variable number
|
||||
haslocal.append(125)
|
||||
def_op('DELETE_FAST', 126) # Local variable number
|
||||
haslocal.append(126)
|
||||
|
||||
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
|
||||
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
|
||||
def_op('MAKE_FUNCTION', 132) # Number of args with default values
|
||||
def_op('BUILD_SLICE', 133) # Number of items
|
||||
def_op('MAKE_CLOSURE', 134)
|
||||
def_op('LOAD_CLOSURE', 135)
|
||||
hasfree.append(135)
|
||||
def_op('LOAD_DEREF', 136)
|
||||
hasfree.append(136)
|
||||
def_op('STORE_DEREF', 137)
|
||||
hasfree.append(137)
|
||||
|
||||
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
|
||||
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
|
||||
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
|
||||
|
||||
jrel_op('SETUP_WITH', 143)
|
||||
|
||||
def_op('EXTENDED_ARG', 145)
|
||||
EXTENDED_ARG = 145
|
||||
def_op('SET_ADD', 146)
|
||||
def_op('MAP_ADD', 147)
|
||||
|
||||
# PyPy magic opcodes
|
||||
# FIXME: see if we can conditionally add them
|
||||
def_op('LOOKUP_METHOD', 201)
|
||||
def_op('CALL_METHOD', 202)
|
||||
def_op('BUILD_LIST_FROM_ARG', 203)
|
||||
def_op('JUMP_IF_NOT_DEBUG', 204)
|
||||
|
||||
updateGlobal()
|
||||
del def_op, name_op, jrel_op, jabs_op
|
||||
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 2.7:
|
||||
import dis
|
||||
# print(set(dis.opmap.items()) - set(opmap.items()))
|
||||
assert all(item in opmap.items() for item in dis.opmap.items())
|
@@ -30,9 +30,10 @@ if PYTHON3:
|
||||
else:
|
||||
L65536 = long(65536) # NOQA
|
||||
|
||||
from uncompyle6.opcodes import (opcode_25, opcode_26, opcode_27)
|
||||
from uncompyle6.opcodes import (opcode_25, opcode_26)
|
||||
|
||||
from xdis.opcodes import (opcode_32, opcode_33, opcode_34, opcode_35)
|
||||
from xdis.opcodes import (opcode_27,
|
||||
opcode_32, opcode_33, opcode_34, opcode_35)
|
||||
|
||||
|
||||
class Code(object):
|
||||
@@ -212,16 +213,6 @@ class Scanner(object):
|
||||
result.append(offset)
|
||||
return result
|
||||
|
||||
def op_size(self, op):
|
||||
"""
|
||||
Return size of operator with its arguments
|
||||
for given opcode <op>.
|
||||
"""
|
||||
if op < self.opc.HAVE_ARGUMENT and op not in self.opc.hasArgumentExtended:
|
||||
return 1
|
||||
else:
|
||||
return 3
|
||||
|
||||
def op_hasArgument(self, op):
|
||||
return self.op_size(op) > 1
|
||||
|
||||
|
772
uncompyle6/scanners/scanner2.py
Executable file
772
uncompyle6/scanners/scanner2.py
Executable file
@@ -0,0 +1,772 @@
|
||||
# Copyright (c) 2015, 2016 by Rocky Bernstein
|
||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||
"""
|
||||
Python 2 Generic bytecode scanner/deparser
|
||||
|
||||
This overlaps various Python3's dis module, but it can be run from
|
||||
Python versions other than the version running this code. Notably,
|
||||
run from Python version 2.
|
||||
|
||||
Also we *modify* the instruction sequence to assist deparsing code.
|
||||
For example:
|
||||
- we add "COME_FROM" instructions to help in figuring out
|
||||
conditional branching and looping.
|
||||
- LOAD_CONSTs are classified further into the type of thing
|
||||
they load:
|
||||
lambda's, genexpr's, {dict,set,list} comprehension's,
|
||||
- PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE}
|
||||
|
||||
Finally we save token information.
|
||||
"""
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import dis, inspect
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
|
||||
from xdis.code import iscode
|
||||
|
||||
# FIXME: remove
|
||||
from xdis.opcodes.opcode_27 import * # NOQA
|
||||
|
||||
import uncompyle6.scanner as scan
|
||||
|
||||
class Scanner2(scan.Scanner):
|
||||
def __init__(self, version):
|
||||
scan.Scanner.__init__(self, version)
|
||||
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Disassemble a Python 2 code object, returning a list of 'Token'.
|
||||
Various tranformations are made to assist the deparsing grammar.
|
||||
For example:
|
||||
- various types of LOAD_CONST's are categorized in terms of what they load
|
||||
- COME_FROM instructions are added to assist parsing control structures
|
||||
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments
|
||||
The main part of this procedure is modelled after
|
||||
dis.disassemble().
|
||||
"""
|
||||
|
||||
# import dis; dis.disassemble(co) # DEBUG
|
||||
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
|
||||
customize = {}
|
||||
Token = self.Token # shortcut
|
||||
|
||||
n = self.setup_code(co)
|
||||
self.build_lines_data(co, n)
|
||||
self.build_prev_op(n)
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
|
||||
self.load_asserts = set()
|
||||
for i in self.op_range(0, n):
|
||||
if self.code[i] == self.opc.PJIT and self.code[i+3] == self.opc.LOAD_GLOBAL:
|
||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||
self.load_asserts.add(i+3)
|
||||
|
||||
cf = self.find_jump_targets()
|
||||
# contains (code, [addrRefToCode])
|
||||
last_stmt = self.next_stmt[0]
|
||||
i = self.next_stmt[last_stmt]
|
||||
replace = {}
|
||||
while i < n-1:
|
||||
if self.lines[last_stmt].next > i:
|
||||
if self.code[last_stmt] == self.opc.PRINT_ITEM:
|
||||
if self.code[i] == self.opc.PRINT_ITEM:
|
||||
replace[i] = 'PRINT_ITEM_CONT'
|
||||
elif self.code[i] == self.opc.PRINT_NEWLINE:
|
||||
replace[i] = 'PRINT_NEWLINE_CONT'
|
||||
last_stmt = i
|
||||
i = self.next_stmt[i]
|
||||
|
||||
imports = self.all_instr(0, n, (self.opc.IMPORT_NAME, self.opc.IMPORT_FROM,
|
||||
self.opc.IMPORT_STAR))
|
||||
if len(imports) > 1:
|
||||
last_import = imports[0]
|
||||
for i in imports[1:]:
|
||||
if self.lines[last_import].next > i:
|
||||
if self.code[last_import] == self.opc.IMPORT_NAME == self.code[i]:
|
||||
replace[i] = 'IMPORT_NAME_CONT'
|
||||
last_import = i
|
||||
|
||||
extended_arg = 0
|
||||
for offset in self.op_range(0, n):
|
||||
if offset in cf:
|
||||
k = 0
|
||||
for j in cf[offset]:
|
||||
tokens.append(Token('COME_FROM', None, repr(j),
|
||||
offset="%s_%d" % (offset, k)))
|
||||
k += 1
|
||||
|
||||
op = self.code[offset]
|
||||
op_name = self.opc.opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
if op >= self.opc.HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == self.opc.EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in self.opc.hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if iscode(const):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
op_name = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
op_name = 'LOAD_GENEXPR'
|
||||
elif const.co_name == '<dictcomp>':
|
||||
op_name = 'LOAD_DICTCOMP'
|
||||
elif const.co_name == '<setcomp>':
|
||||
op_name = 'LOAD_SETCOMP'
|
||||
# verify() uses 'pattr' for comparison, since 'attr'
|
||||
# now holds Code(const) and thus can not be used
|
||||
# for comparison (todo: think about changing this)
|
||||
# pattr = 'code_object @ 0x%x %s->%s' %\
|
||||
# (id(const), const.co_filename, const.co_name)
|
||||
pattr = '<code_object ' + const.co_name + '>'
|
||||
else:
|
||||
pattr = const
|
||||
elif op in self.opc.hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in self.opc.hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in self.opc.hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in self.opc.haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in self.opc.hascompare:
|
||||
pattr = self.opc.cmp_op[oparg]
|
||||
elif op in self.opc.hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
|
||||
UNPACK_SEQUENCE,
|
||||
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
|
||||
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
|
||||
CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS
|
||||
):
|
||||
# CE - Hack for >= 2.5
|
||||
# Now all values loaded via LOAD_CLOSURE are packed into
|
||||
# a tuple before calling MAKE_CLOSURE.
|
||||
if op == BUILD_TUPLE and \
|
||||
self.code[self.prev[offset]] == LOAD_CLOSURE:
|
||||
continue
|
||||
else:
|
||||
op_name = '%s_%d' % (op_name, oparg)
|
||||
if op != BUILD_SLICE:
|
||||
customize[op_name] = oparg
|
||||
elif op == JA:
|
||||
target = self.get_target(offset)
|
||||
if target < offset:
|
||||
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
|
||||
and offset not in self.not_continue:
|
||||
op_name = 'CONTINUE'
|
||||
else:
|
||||
op_name = 'JUMP_BACK'
|
||||
|
||||
elif op == LOAD_GLOBAL:
|
||||
if offset in self.load_asserts:
|
||||
op_name = 'LOAD_ASSERT'
|
||||
elif op == RETURN_VALUE:
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset in self.linestartoffsets:
|
||||
linestart = self.linestartoffsets[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
return tokens, customize
|
||||
|
||||
def disassemble_native(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Like disassemble3 but doesn't try to adjust any opcodes.
|
||||
"""
|
||||
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
|
||||
customize = {}
|
||||
Token = self.Token # shortcut
|
||||
|
||||
n = self.setup_code(co)
|
||||
self.build_lines_data(co, n)
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
|
||||
extended_arg = 0
|
||||
for offset in self.op_range(0, n):
|
||||
op = self.code[offset]
|
||||
op_name = self.opc.opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
if op >= HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in hasconst:
|
||||
pattr = co.co_consts[oparg]
|
||||
elif op in hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in hascompare:
|
||||
pattr = cmp_op[oparg]
|
||||
elif op in hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if offset in self.linestartoffsets:
|
||||
linestart = self.linestartoffsets[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
pass
|
||||
return tokens, customize
|
||||
|
||||
def op_size(self, op):
|
||||
"""
|
||||
Return size of operator with its arguments
|
||||
for given opcode <op>.
|
||||
"""
|
||||
if op < self.opc.HAVE_ARGUMENT and op not in self.opc.hasArgumentExtended:
|
||||
return 1
|
||||
else:
|
||||
return 3
|
||||
|
||||
def setup_code(self, co):
|
||||
"""
|
||||
Creates Python-independent bytecode structure (byte array) in
|
||||
self.code and records previous instruction in self.prev
|
||||
The size of self.code is returned
|
||||
"""
|
||||
self.code = array('B', co.co_code)
|
||||
|
||||
n = -1
|
||||
for i in self.op_range(0, len(self.code)):
|
||||
if self.code[i] in (self.opc.RETURN_VALUE, self.opc.END_FINALLY):
|
||||
n = i + 1
|
||||
pass
|
||||
pass
|
||||
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
|
||||
self.code = array('B', co.co_code[:n])
|
||||
|
||||
return n
|
||||
|
||||
def build_prev_op(self, n):
|
||||
self.prev = [0]
|
||||
# mapping addresses of instruction & argument
|
||||
for i in self.op_range(0, n):
|
||||
op = self.code[i]
|
||||
self.prev.append(i)
|
||||
if op >= HAVE_ARGUMENT:
|
||||
self.prev.append(i)
|
||||
self.prev.append(i)
|
||||
pass
|
||||
pass
|
||||
|
||||
def build_lines_data(self, co, n):
|
||||
"""
|
||||
Initializes self.lines and self.linesstartoffsets
|
||||
"""
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestarts = list(dis.findlinestarts(co))
|
||||
self.linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
self.linestartoffsets[offset] = lineno
|
||||
|
||||
j = 0
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
self.lines.append(linetuple(prev_line_no, start_byte))
|
||||
j += 1
|
||||
prev_line_no = start_byte
|
||||
while j < n:
|
||||
self.lines.append(linetuple(prev_line_no, n))
|
||||
j+=1
|
||||
return
|
||||
|
||||
def build_stmt_indices(self):
|
||||
code = self.code
|
||||
start = 0
|
||||
end = len(code)
|
||||
|
||||
stmt_opcodes = set([
|
||||
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP, self.opc.CONTINUE_LOOP,
|
||||
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,
|
||||
self.opc.SETUP_EXCEPT, self.opc.SETUP_WITH,
|
||||
self.opc.POP_BLOCK, self.opc.STORE_FAST, self.opc.DELETE_FAST,
|
||||
self.opc.STORE_DEREF, self.opc.STORE_GLOBAL,
|
||||
self.opc.DELETE_GLOBAL, self.opc.STORE_NAME,
|
||||
self.opc.DELETE_NAME, self.opc.STORE_ATTR,
|
||||
self.opc.DELETE_ATTR,
|
||||
## FIXME keep going.
|
||||
STORE_SUBSCR, DELETE_SUBSCR,
|
||||
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
|
||||
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
|
||||
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
|
||||
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
|
||||
JUMP_ABSOLUTE, EXEC_STMT,
|
||||
])
|
||||
|
||||
stmt_opcode_seqs = [(self.opc.PJIF, self.opc.JF),
|
||||
(self.opc.PJIF, self.opc.JA),
|
||||
(self.opc.PJIT, self.opc.JF),
|
||||
(self.opc.PJIT, self.opc.JA)]
|
||||
|
||||
designator_ops = set([
|
||||
self.opc.STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
|
||||
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
|
||||
STORE_SUBSCR, UNPACK_SEQUENCE, JA
|
||||
])
|
||||
|
||||
prelim = self.all_instr(start, end, stmt_opcodes)
|
||||
|
||||
stmts = self.stmts = set(prelim)
|
||||
pass_stmts = set()
|
||||
for seq in stmt_opcode_seqs:
|
||||
for i in self.op_range(start, end-(len(seq)+1)):
|
||||
match = True
|
||||
for elem in seq:
|
||||
if elem != code[i]:
|
||||
match = False
|
||||
break
|
||||
i += self.op_size(code[i])
|
||||
|
||||
if match:
|
||||
i = self.prev[i]
|
||||
stmts.add(i)
|
||||
pass_stmts.add(i)
|
||||
|
||||
if pass_stmts:
|
||||
stmt_list = list(stmts)
|
||||
stmt_list.sort()
|
||||
else:
|
||||
stmt_list = prelim
|
||||
last_stmt = -1
|
||||
self.next_stmt = []
|
||||
slist = self.next_stmt = []
|
||||
i = 0
|
||||
for s in stmt_list:
|
||||
if code[s] == JA and s not in pass_stmts:
|
||||
target = self.get_target(s)
|
||||
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
|
||||
stmts.remove(s)
|
||||
continue
|
||||
j = self.prev[s]
|
||||
while code[j] == JA:
|
||||
j = self.prev[j]
|
||||
if code[j] == self.opc.LIST_APPEND: # list comprehension
|
||||
stmts.remove(s)
|
||||
continue
|
||||
elif code[s] == self.opc.POP_TOP and code[self.prev[s]] == self.opc.ROT_TWO:
|
||||
stmts.remove(s)
|
||||
continue
|
||||
elif code[s] in designator_ops:
|
||||
j = self.prev[s]
|
||||
while code[j] in designator_ops:
|
||||
j = self.prev[j]
|
||||
if code[j] == self.opc.FOR_ITER:
|
||||
stmts.remove(s)
|
||||
continue
|
||||
last_stmt = s
|
||||
slist += [s] * (s-i)
|
||||
i = s
|
||||
slist += [end] * (end-len(slist))
|
||||
|
||||
def next_except_jump(self, start):
|
||||
'''
|
||||
Return the next jump that was generated by an except SomeException:
|
||||
construct in a try...except...else clause or None if not found.
|
||||
'''
|
||||
|
||||
if self.code[start] == DUP_TOP:
|
||||
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
|
||||
if except_match:
|
||||
jmp = self.prev[self.get_target(except_match)]
|
||||
self.ignore_if.add(except_match)
|
||||
self.not_continue.add(jmp)
|
||||
return jmp
|
||||
|
||||
count_END_FINALLY = 0
|
||||
count_SETUP_ = 0
|
||||
for i in self.op_range(start, len(self.code)):
|
||||
op = self.code[i]
|
||||
if op == END_FINALLY:
|
||||
if count_END_FINALLY == count_SETUP_:
|
||||
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
|
||||
self.not_continue.add(self.prev[i])
|
||||
return self.prev[i]
|
||||
count_END_FINALLY += 1
|
||||
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
|
||||
count_SETUP_ += 1
|
||||
|
||||
def detect_structure(self, pos, op=None):
|
||||
'''
|
||||
Detect type of block structures and their boundaries to fix optimized jumps
|
||||
in python2.3+
|
||||
'''
|
||||
|
||||
# TODO: check the struct boundaries more precisely -Dan
|
||||
|
||||
code = self.code
|
||||
# Ev remove this test and make op a mandatory argument -Dan
|
||||
if op is None:
|
||||
op = code[pos]
|
||||
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
end = parent['end']
|
||||
for s in self.structs:
|
||||
_start = s['start']
|
||||
_end = s['end']
|
||||
if (_start <= pos < _end) and (_start >= start and _end <= end):
|
||||
start = _start
|
||||
end = _end
|
||||
parent = s
|
||||
|
||||
if op == SETUP_LOOP:
|
||||
start = pos+3
|
||||
target = self.get_target(pos, op)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
|
||||
if target != end:
|
||||
self.fixed_jumps[pos] = end
|
||||
(line_no, next_line_byte) = self.lines[pos]
|
||||
jump_back = self.last_instr(start, end, JA,
|
||||
next_line_byte, False)
|
||||
|
||||
if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF):
|
||||
if code[self.prev[end]] == RETURN_VALUE or \
|
||||
(code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE):
|
||||
jump_back = None
|
||||
if not jump_back: # loop suite ends in return. wtf right?
|
||||
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
|
||||
if not jump_back:
|
||||
return
|
||||
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
self.ignore_if.add(self.prev[next_line_byte])
|
||||
target = next_line_byte
|
||||
end = jump_back + 3
|
||||
else:
|
||||
if self.get_target(jump_back) >= next_line_byte:
|
||||
jump_back = self.last_instr(start, end, JA, start, False)
|
||||
if end > jump_back+4 and code[end] in (JF, JA):
|
||||
if code[jump_back+4] in (JA, JF):
|
||||
if self.get_target(jump_back+4) == self.get_target(end):
|
||||
self.fixed_jumps[pos] = jump_back+4
|
||||
end = jump_back+4
|
||||
elif target < pos:
|
||||
self.fixed_jumps[pos] = jump_back+4
|
||||
end = jump_back+4
|
||||
target = self.get_target(jump_back, JA)
|
||||
|
||||
if code[target] in (FOR_ITER, GET_ITER):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
test = self.prev[next_line_byte]
|
||||
if test == pos:
|
||||
loop_type = 'while 1'
|
||||
elif self.code[test] in hasjabs+hasjrel:
|
||||
self.ignore_if.add(test)
|
||||
test_target = self.get_target(test)
|
||||
if test_target > (jump_back+3):
|
||||
jump_back = test_target
|
||||
self.not_continue.add(jump_back)
|
||||
self.loops.append(target)
|
||||
self.structs.append({'type': loop_type + '-loop',
|
||||
'start': target,
|
||||
'end': jump_back})
|
||||
if jump_back+3 != end:
|
||||
self.structs.append({'type': loop_type + '-else',
|
||||
'start': jump_back+3,
|
||||
'end': end})
|
||||
elif op == SETUP_EXCEPT:
|
||||
start = pos+3
|
||||
target = self.get_target(pos, op)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
if target != end:
|
||||
self.fixed_jumps[pos] = end
|
||||
# print target, end, parent
|
||||
# Add the try block
|
||||
self.structs.append({'type': 'try',
|
||||
'start': start,
|
||||
'end': end-4})
|
||||
# Now isolate the except and else blocks
|
||||
end_else = start_else = self.get_target(self.prev[end])
|
||||
|
||||
# Add the except blocks
|
||||
i = end
|
||||
while self.code[i] != END_FINALLY:
|
||||
jmp = self.next_except_jump(i)
|
||||
if self.code[jmp] == RETURN_VALUE:
|
||||
self.structs.append({'type': 'except',
|
||||
'start': i,
|
||||
'end': jmp+1})
|
||||
i = jmp + 1
|
||||
else:
|
||||
if self.get_target(jmp) != start_else:
|
||||
end_else = self.get_target(jmp)
|
||||
if self.code[jmp] == JF:
|
||||
self.fixed_jumps[jmp] = -1
|
||||
self.structs.append({'type': 'except',
|
||||
'start': i,
|
||||
'end': jmp})
|
||||
i = jmp + 3
|
||||
|
||||
# Add the try-else block
|
||||
if end_else != start_else:
|
||||
r_end_else = self.restrict_to_parent(end_else, parent)
|
||||
self.structs.append({'type': 'try-else',
|
||||
'start': i+1,
|
||||
'end': r_end_else})
|
||||
self.fixed_jumps[i] = r_end_else
|
||||
else:
|
||||
self.fixed_jumps[i] = i+1
|
||||
|
||||
elif op in (PJIF, PJIT):
|
||||
start = pos+3
|
||||
target = self.get_target(pos, op)
|
||||
rtarget = self.restrict_to_parent(target, parent)
|
||||
pre = self.prev
|
||||
|
||||
if target != rtarget and parent['type'] == 'and/or':
|
||||
self.fixed_jumps[pos] = rtarget
|
||||
return
|
||||
# does this jump to right after another cond jump?
|
||||
# if so, it's part of a larger conditional
|
||||
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
|
||||
PJIF, PJIT)) and (target > pos):
|
||||
self.fixed_jumps[pos] = pre[target]
|
||||
self.structs.append({'type': 'and/or',
|
||||
'start': start,
|
||||
'end': pre[target]})
|
||||
return
|
||||
|
||||
# is this an if and
|
||||
if op == PJIF:
|
||||
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
|
||||
match = self.remove_mid_line_ifs(match)
|
||||
|
||||
if match:
|
||||
if code[pre[rtarget]] in (JF, JA) \
|
||||
and pre[rtarget] not in self.stmts \
|
||||
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
|
||||
if code[pre[pre[rtarget]]] == JA \
|
||||
and self.remove_mid_line_ifs([pos]) \
|
||||
and target == self.get_target(pre[pre[rtarget]]) \
|
||||
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
|
||||
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
|
||||
pass
|
||||
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
|
||||
and self.remove_mid_line_ifs([pos]) \
|
||||
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
|
||||
pre[pre[rtarget]],
|
||||
(PJIF, PJIT), target)))
|
||||
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]],
|
||||
(PJIF, PJIT, JA), pre[rtarget], True))))):
|
||||
pass
|
||||
else:
|
||||
fix = None
|
||||
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
|
||||
last_jump_good = True
|
||||
for j in jump_ifs:
|
||||
if target == self.get_target(j):
|
||||
if self.lines[j].next == j+3 and last_jump_good:
|
||||
fix = j
|
||||
break
|
||||
else:
|
||||
last_jump_good = False
|
||||
self.fixed_jumps[pos] = fix or match[-1]
|
||||
return
|
||||
else:
|
||||
self.fixed_jumps[pos] = match[-1]
|
||||
return
|
||||
else: # op == PJIT
|
||||
if (pos+3) in self.load_asserts:
|
||||
if code[pre[rtarget]] == RAISE_VARARGS:
|
||||
return
|
||||
self.load_asserts.remove(pos+3)
|
||||
|
||||
next = self.next_stmt[pos]
|
||||
if pre[next] == pos:
|
||||
pass
|
||||
elif code[next] in (JF, JA) and target == self.get_target(next):
|
||||
if code[pre[next]] == PJIF:
|
||||
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
|
||||
self.fixed_jumps[pos] = pre[next]
|
||||
return
|
||||
elif code[next] == JA and code[target] in (JA, JF):
|
||||
next_target = self.get_target(next)
|
||||
if self.get_target(target) == next_target:
|
||||
self.fixed_jumps[pos] = pre[next]
|
||||
return
|
||||
elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target):
|
||||
self.fixed_jumps[pos] = pre[next]
|
||||
return
|
||||
|
||||
# don't add a struct for a while test, it's already taken care of
|
||||
if pos in self.ignore_if:
|
||||
return
|
||||
|
||||
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
|
||||
and pre[rtarget] != pos and pre[pre[rtarget]] != pos:
|
||||
if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK:
|
||||
if code[pre[pre[rtarget]]] != JA:
|
||||
pass
|
||||
elif self.get_target(pre[pre[rtarget]]) != target:
|
||||
pass
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
# does the if jump just beyond a jump op, then this is probably an if statement
|
||||
if code[pre[rtarget]] in (JA, JF):
|
||||
if_end = self.get_target(pre[rtarget])
|
||||
|
||||
# is this a loop not an if?
|
||||
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
|
||||
if(if_end > start):
|
||||
return
|
||||
|
||||
end = self.restrict_to_parent(if_end, parent)
|
||||
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': pre[rtarget]})
|
||||
self.not_continue.add(pre[rtarget])
|
||||
|
||||
if rtarget < end:
|
||||
self.structs.append({'type': 'if-else',
|
||||
'start': rtarget,
|
||||
'end': end})
|
||||
elif code[pre[rtarget]] == RETURN_VALUE:
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': rtarget})
|
||||
self.return_end_ifs.add(pre[rtarget])
|
||||
|
||||
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
target = self.get_target(pos, op)
|
||||
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
|
||||
|
||||
def find_jump_targets(self):
|
||||
'''
|
||||
Detect all offsets in a byte code which are jump targets.
|
||||
|
||||
Return the list of offsets.
|
||||
|
||||
This procedure is modelled after dis.findlabels(), but here
|
||||
for each target the number of jumps are counted.
|
||||
'''
|
||||
|
||||
n = len(self.code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': n-1}]
|
||||
self.loops = [] # All loop entry points
|
||||
self.fixed_jumps = {} # Map fixed jumps to their real destination
|
||||
self.ignore_if = set()
|
||||
self.build_stmt_indices()
|
||||
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for i in self.op_range(0, n):
|
||||
op = self.code[i]
|
||||
|
||||
# Determine structures and fix jumps in Python versions
|
||||
# since 2.3
|
||||
self.detect_structure(i, op)
|
||||
|
||||
if op >= HAVE_ARGUMENT:
|
||||
label = self.fixed_jumps.get(i)
|
||||
oparg = self.code[i+1] + self.code[i+2] * 256
|
||||
if label is None:
|
||||
if op in hasjrel and op != FOR_ITER:
|
||||
label = i + 3 + oparg
|
||||
elif op in hasjabs:
|
||||
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
if (oparg > i):
|
||||
label = oparg
|
||||
|
||||
if label is not None and label != -1:
|
||||
targets[label] = targets.get(label, []) + [i]
|
||||
elif op == END_FINALLY and i in self.fixed_jumps:
|
||||
label = self.fixed_jumps[i]
|
||||
targets[label] = targets.get(label, []) + [i]
|
||||
return targets
|
||||
|
||||
if __name__ == "__main__":
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION >= 2.3:
|
||||
co = inspect.currentframe().f_code
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
tokens, customize = Scanner2(PYTHON_VERSION).disassemble(co)
|
||||
for t in tokens:
|
||||
print(t.format())
|
||||
else:
|
||||
print("Need to be Python 3.2 or greater to demo; I am %s." %
|
||||
PYTHON_VERSION)
|
||||
pass
|
@@ -16,11 +16,11 @@ from array import array
|
||||
|
||||
import dis
|
||||
from uncompyle6.opcodes.opcode_25 import *
|
||||
import uncompyle6.scanner as scan
|
||||
import uncompyle6.scanners.scanner2 as scan
|
||||
|
||||
class Scanner25(scan.Scanner):
|
||||
class Scanner25(scan.Scanner2):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 2.5)
|
||||
super(Scanner25, self).__init__(2.5)
|
||||
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
'''
|
||||
|
@@ -16,11 +16,11 @@ from array import array
|
||||
|
||||
from uncompyle6.opcodes.opcode_26 import *
|
||||
import dis
|
||||
import uncompyle6.scanner as scan
|
||||
import uncompyle6.scanners.scanner2 as scan
|
||||
|
||||
class Scanner26(scan.Scanner):
|
||||
class Scanner26(scan.Scanner2):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 2.6)
|
||||
super(Scanner26, self).__init__(2.6)
|
||||
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
'''
|
||||
|
@@ -1,7 +1,4 @@
|
||||
# Copyright (c) 2015, 2016 by Rocky Bernstein
|
||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||
# Copyright (c) 1999 John Aycock
|
||||
"""
|
||||
Python 2.7 bytecode scanner/deparser
|
||||
|
||||
@@ -13,722 +10,27 @@ for later use in deparsing.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import dis, inspect
|
||||
from collections import namedtuple
|
||||
from array import array
|
||||
from uncompyle6.scanners.scanner2 import Scanner2
|
||||
|
||||
from xdis.code import iscode
|
||||
from uncompyle6.opcodes.opcode_27 import * # NOQA
|
||||
import uncompyle6.scanner as scan
|
||||
# bytecode verification, verify(), uses JUMP_OPs from here
|
||||
from xdis.opcodes import opcode_27
|
||||
JUMP_OPs = opcode_27.JUMP_OPs
|
||||
|
||||
class Scanner27(scan.Scanner):
|
||||
class Scanner27(Scanner2):
|
||||
def __init__(self):
|
||||
scan.Scanner.__init__(self, 2.7)
|
||||
|
||||
def disassemble(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Disassemble a Python 3 ode object, returning a list of 'Token'.
|
||||
Various tranformations are made to assist the deparsing grammar.
|
||||
For example:
|
||||
- various types of LOAD_CONST's are categorized in terms of what they load
|
||||
- COME_FROM instructions are added to assist parsing control structures
|
||||
- MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments
|
||||
The main part of this procedure is modelled after
|
||||
dis.disassemble().
|
||||
"""
|
||||
|
||||
# import dis; dis.disassemble(co) # DEBUG
|
||||
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
|
||||
customize = {}
|
||||
Token = self.Token # shortcut
|
||||
|
||||
n = self.setup_code(co)
|
||||
self.build_lines_data(co, n)
|
||||
self.build_prev_op(n)
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
|
||||
self.load_asserts = set()
|
||||
for i in self.op_range(0, n):
|
||||
if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL:
|
||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||
self.load_asserts.add(i+3)
|
||||
|
||||
cf = self.find_jump_targets()
|
||||
# contains (code, [addrRefToCode])
|
||||
last_stmt = self.next_stmt[0]
|
||||
i = self.next_stmt[last_stmt]
|
||||
replace = {}
|
||||
while i < n-1:
|
||||
if self.lines[last_stmt].next > i:
|
||||
if self.code[last_stmt] == PRINT_ITEM:
|
||||
if self.code[i] == PRINT_ITEM:
|
||||
replace[i] = 'PRINT_ITEM_CONT'
|
||||
elif self.code[i] == PRINT_NEWLINE:
|
||||
replace[i] = 'PRINT_NEWLINE_CONT'
|
||||
last_stmt = i
|
||||
i = self.next_stmt[i]
|
||||
|
||||
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
|
||||
if len(imports) > 1:
|
||||
last_import = imports[0]
|
||||
for i in imports[1:]:
|
||||
if self.lines[last_import].next > i:
|
||||
if self.code[last_import] == IMPORT_NAME == self.code[i]:
|
||||
replace[i] = 'IMPORT_NAME_CONT'
|
||||
last_import = i
|
||||
|
||||
extended_arg = 0
|
||||
for offset in self.op_range(0, n):
|
||||
if offset in cf:
|
||||
k = 0
|
||||
for j in cf[offset]:
|
||||
tokens.append(Token('COME_FROM', None, repr(j),
|
||||
offset="%s_%d" % (offset, k)))
|
||||
k += 1
|
||||
|
||||
op = self.code[offset]
|
||||
op_name = opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
if op >= HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if iscode(const):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
op_name = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
op_name = 'LOAD_GENEXPR'
|
||||
elif const.co_name == '<dictcomp>':
|
||||
op_name = 'LOAD_DICTCOMP'
|
||||
elif const.co_name == '<setcomp>':
|
||||
op_name = 'LOAD_SETCOMP'
|
||||
# verify() uses 'pattr' for comparison, since 'attr'
|
||||
# now holds Code(const) and thus can not be used
|
||||
# for comparison (todo: think about changing this)
|
||||
# pattr = 'code_object @ 0x%x %s->%s' %\
|
||||
# (id(const), const.co_filename, const.co_name)
|
||||
pattr = '<code_object ' + const.co_name + '>'
|
||||
else:
|
||||
pattr = const
|
||||
elif op in hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in hascompare:
|
||||
pattr = cmp_op[oparg]
|
||||
elif op in hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
|
||||
UNPACK_SEQUENCE,
|
||||
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
|
||||
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
|
||||
CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS
|
||||
):
|
||||
# CE - Hack for >= 2.5
|
||||
# Now all values loaded via LOAD_CLOSURE are packed into
|
||||
# a tuple before calling MAKE_CLOSURE.
|
||||
if op == BUILD_TUPLE and \
|
||||
self.code[self.prev[offset]] == LOAD_CLOSURE:
|
||||
continue
|
||||
else:
|
||||
op_name = '%s_%d' % (op_name, oparg)
|
||||
if op != BUILD_SLICE:
|
||||
customize[op_name] = oparg
|
||||
elif op == JA:
|
||||
target = self.get_target(offset)
|
||||
if target < offset:
|
||||
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
|
||||
and offset not in self.not_continue:
|
||||
op_name = 'CONTINUE'
|
||||
else:
|
||||
op_name = 'JUMP_BACK'
|
||||
|
||||
elif op == LOAD_GLOBAL:
|
||||
if offset in self.load_asserts:
|
||||
op_name = 'LOAD_ASSERT'
|
||||
elif op == RETURN_VALUE:
|
||||
if offset in self.return_end_ifs:
|
||||
op_name = 'RETURN_END_IF'
|
||||
|
||||
if offset in self.linestartoffsets:
|
||||
linestart = self.linestartoffsets[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
return tokens, customize
|
||||
|
||||
def disassemble_native(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Like disassemble3 but doesn't try to adjust any opcodes.
|
||||
"""
|
||||
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
|
||||
customize = {}
|
||||
Token = self.Token # shortcut
|
||||
|
||||
n = self.setup_code(co)
|
||||
self.build_lines_data(co, n)
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
|
||||
extended_arg = 0
|
||||
for offset in self.op_range(0, n):
|
||||
op = self.code[offset]
|
||||
op_name = opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
if op >= HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in hasconst:
|
||||
pattr = co.co_consts[oparg]
|
||||
elif op in hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in hascompare:
|
||||
pattr = cmp_op[oparg]
|
||||
elif op in hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if offset in self.linestartoffsets:
|
||||
linestart = self.linestartoffsets[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
pass
|
||||
return tokens, customize
|
||||
|
||||
def setup_code(self, co):
|
||||
"""
|
||||
Creates Python-independent bytecode structure (byte array) in
|
||||
self.code and records previous instruction in self.prev
|
||||
The size of self.code is returned
|
||||
"""
|
||||
self.code = array('B', co.co_code)
|
||||
|
||||
n = -1
|
||||
for i in self.op_range(0, len(self.code)):
|
||||
if self.code[i] in (RETURN_VALUE, END_FINALLY):
|
||||
n = i + 1
|
||||
pass
|
||||
pass
|
||||
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
|
||||
self.code = array('B', co.co_code[:n])
|
||||
|
||||
return n
|
||||
|
||||
def build_prev_op(self, n):
|
||||
self.prev = [0]
|
||||
# mapping addresses of instruction & argument
|
||||
for i in self.op_range(0, n):
|
||||
op = self.code[i]
|
||||
self.prev.append(i)
|
||||
if op >= HAVE_ARGUMENT:
|
||||
self.prev.append(i)
|
||||
self.prev.append(i)
|
||||
pass
|
||||
pass
|
||||
|
||||
def build_lines_data(self, co, n):
|
||||
"""
|
||||
Initializes self.lines and self.linesstartoffsets
|
||||
"""
|
||||
self.lines = []
|
||||
linetuple = namedtuple('linetuple', ['l_no', 'next'])
|
||||
|
||||
# linestarts is a tuple of (offset, line number).
|
||||
# Turn that in a has that we can index
|
||||
linestarts = list(dis.findlinestarts(co))
|
||||
self.linestartoffsets = {}
|
||||
for offset, lineno in linestarts:
|
||||
self.linestartoffsets[offset] = lineno
|
||||
|
||||
j = 0
|
||||
(prev_start_byte, prev_line_no) = linestarts[0]
|
||||
for (start_byte, line_no) in linestarts[1:]:
|
||||
while j < start_byte:
|
||||
self.lines.append(linetuple(prev_line_no, start_byte))
|
||||
j += 1
|
||||
prev_line_no = start_byte
|
||||
while j < n:
|
||||
self.lines.append(linetuple(prev_line_no, n))
|
||||
j+=1
|
||||
super(Scanner27, self).__init__(2.7)
|
||||
return
|
||||
|
||||
def build_stmt_indices(self):
|
||||
code = self.code
|
||||
start = 0
|
||||
end = len(code)
|
||||
|
||||
stmt_opcodes = set([
|
||||
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
|
||||
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
|
||||
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
|
||||
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
|
||||
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
|
||||
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
|
||||
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
|
||||
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
|
||||
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
|
||||
JUMP_ABSOLUTE, EXEC_STMT,
|
||||
])
|
||||
|
||||
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
|
||||
|
||||
designator_ops = set([
|
||||
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
|
||||
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
|
||||
STORE_SUBSCR, UNPACK_SEQUENCE, JA
|
||||
])
|
||||
|
||||
prelim = self.all_instr(start, end, stmt_opcodes)
|
||||
|
||||
stmts = self.stmts = set(prelim)
|
||||
pass_stmts = set()
|
||||
for seq in stmt_opcode_seqs:
|
||||
for i in self.op_range(start, end-(len(seq)+1)):
|
||||
match = True
|
||||
for elem in seq:
|
||||
if elem != code[i]:
|
||||
match = False
|
||||
break
|
||||
i += self.op_size(code[i])
|
||||
|
||||
if match:
|
||||
i = self.prev[i]
|
||||
stmts.add(i)
|
||||
pass_stmts.add(i)
|
||||
|
||||
if pass_stmts:
|
||||
stmt_list = list(stmts)
|
||||
stmt_list.sort()
|
||||
else:
|
||||
stmt_list = prelim
|
||||
last_stmt = -1
|
||||
self.next_stmt = []
|
||||
slist = self.next_stmt = []
|
||||
i = 0
|
||||
for s in stmt_list:
|
||||
if code[s] == JA and s not in pass_stmts:
|
||||
target = self.get_target(s)
|
||||
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
|
||||
stmts.remove(s)
|
||||
continue
|
||||
j = self.prev[s]
|
||||
while code[j] == JA:
|
||||
j = self.prev[j]
|
||||
if code[j] == LIST_APPEND: # list comprehension
|
||||
stmts.remove(s)
|
||||
continue
|
||||
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
|
||||
stmts.remove(s)
|
||||
continue
|
||||
elif code[s] in designator_ops:
|
||||
j = self.prev[s]
|
||||
while code[j] in designator_ops:
|
||||
j = self.prev[j]
|
||||
if code[j] == FOR_ITER:
|
||||
stmts.remove(s)
|
||||
continue
|
||||
last_stmt = s
|
||||
slist += [s] * (s-i)
|
||||
i = s
|
||||
slist += [end] * (end-len(slist))
|
||||
|
||||
def next_except_jump(self, start):
|
||||
'''
|
||||
Return the next jump that was generated by an except SomeException:
|
||||
construct in a try...except...else clause or None if not found.
|
||||
'''
|
||||
|
||||
if self.code[start] == DUP_TOP:
|
||||
except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE)
|
||||
if except_match:
|
||||
jmp = self.prev[self.get_target(except_match)]
|
||||
self.ignore_if.add(except_match)
|
||||
self.not_continue.add(jmp)
|
||||
return jmp
|
||||
|
||||
count_END_FINALLY = 0
|
||||
count_SETUP_ = 0
|
||||
for i in self.op_range(start, len(self.code)):
|
||||
op = self.code[i]
|
||||
if op == END_FINALLY:
|
||||
if count_END_FINALLY == count_SETUP_:
|
||||
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
|
||||
self.not_continue.add(self.prev[i])
|
||||
return self.prev[i]
|
||||
count_END_FINALLY += 1
|
||||
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
|
||||
count_SETUP_ += 1
|
||||
|
||||
def detect_structure(self, pos, op=None):
|
||||
'''
|
||||
Detect type of block structures and their boundaries to fix optimized jumps
|
||||
in python2.3+
|
||||
'''
|
||||
|
||||
# TODO: check the struct boundaries more precisely -Dan
|
||||
|
||||
code = self.code
|
||||
# Ev remove this test and make op a mandatory argument -Dan
|
||||
if op is None:
|
||||
op = code[pos]
|
||||
|
||||
# Detect parent structure
|
||||
parent = self.structs[0]
|
||||
start = parent['start']
|
||||
end = parent['end']
|
||||
for s in self.structs:
|
||||
_start = s['start']
|
||||
_end = s['end']
|
||||
if (_start <= pos < _end) and (_start >= start and _end <= end):
|
||||
start = _start
|
||||
end = _end
|
||||
parent = s
|
||||
|
||||
if op == SETUP_LOOP:
|
||||
start = pos+3
|
||||
target = self.get_target(pos, op)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
|
||||
if target != end:
|
||||
self.fixed_jumps[pos] = end
|
||||
(line_no, next_line_byte) = self.lines[pos]
|
||||
jump_back = self.last_instr(start, end, JA,
|
||||
next_line_byte, False)
|
||||
|
||||
if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF):
|
||||
if code[self.prev[end]] == RETURN_VALUE or \
|
||||
(code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE):
|
||||
jump_back = None
|
||||
if not jump_back: # loop suite ends in return. wtf right?
|
||||
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
|
||||
if not jump_back:
|
||||
return
|
||||
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
self.ignore_if.add(self.prev[next_line_byte])
|
||||
target = next_line_byte
|
||||
end = jump_back + 3
|
||||
else:
|
||||
if self.get_target(jump_back) >= next_line_byte:
|
||||
jump_back = self.last_instr(start, end, JA, start, False)
|
||||
if end > jump_back+4 and code[end] in (JF, JA):
|
||||
if code[jump_back+4] in (JA, JF):
|
||||
if self.get_target(jump_back+4) == self.get_target(end):
|
||||
self.fixed_jumps[pos] = jump_back+4
|
||||
end = jump_back+4
|
||||
elif target < pos:
|
||||
self.fixed_jumps[pos] = jump_back+4
|
||||
end = jump_back+4
|
||||
target = self.get_target(jump_back, JA)
|
||||
|
||||
if code[target] in (FOR_ITER, GET_ITER):
|
||||
loop_type = 'for'
|
||||
else:
|
||||
loop_type = 'while'
|
||||
test = self.prev[next_line_byte]
|
||||
if test == pos:
|
||||
loop_type = 'while 1'
|
||||
elif self.code[test] in hasjabs+hasjrel:
|
||||
self.ignore_if.add(test)
|
||||
test_target = self.get_target(test)
|
||||
if test_target > (jump_back+3):
|
||||
jump_back = test_target
|
||||
self.not_continue.add(jump_back)
|
||||
self.loops.append(target)
|
||||
self.structs.append({'type': loop_type + '-loop',
|
||||
'start': target,
|
||||
'end': jump_back})
|
||||
if jump_back+3 != end:
|
||||
self.structs.append({'type': loop_type + '-else',
|
||||
'start': jump_back+3,
|
||||
'end': end})
|
||||
elif op == SETUP_EXCEPT:
|
||||
start = pos+3
|
||||
target = self.get_target(pos, op)
|
||||
end = self.restrict_to_parent(target, parent)
|
||||
if target != end:
|
||||
self.fixed_jumps[pos] = end
|
||||
# print target, end, parent
|
||||
# Add the try block
|
||||
self.structs.append({'type': 'try',
|
||||
'start': start,
|
||||
'end': end-4})
|
||||
# Now isolate the except and else blocks
|
||||
end_else = start_else = self.get_target(self.prev[end])
|
||||
|
||||
# Add the except blocks
|
||||
i = end
|
||||
while self.code[i] != END_FINALLY:
|
||||
jmp = self.next_except_jump(i)
|
||||
if self.code[jmp] == RETURN_VALUE:
|
||||
self.structs.append({'type': 'except',
|
||||
'start': i,
|
||||
'end': jmp+1})
|
||||
i = jmp + 1
|
||||
else:
|
||||
if self.get_target(jmp) != start_else:
|
||||
end_else = self.get_target(jmp)
|
||||
if self.code[jmp] == JF:
|
||||
self.fixed_jumps[jmp] = -1
|
||||
self.structs.append({'type': 'except',
|
||||
'start': i,
|
||||
'end': jmp})
|
||||
i = jmp + 3
|
||||
|
||||
# Add the try-else block
|
||||
if end_else != start_else:
|
||||
r_end_else = self.restrict_to_parent(end_else, parent)
|
||||
self.structs.append({'type': 'try-else',
|
||||
'start': i+1,
|
||||
'end': r_end_else})
|
||||
self.fixed_jumps[i] = r_end_else
|
||||
else:
|
||||
self.fixed_jumps[i] = i+1
|
||||
|
||||
elif op in (PJIF, PJIT):
|
||||
start = pos+3
|
||||
target = self.get_target(pos, op)
|
||||
rtarget = self.restrict_to_parent(target, parent)
|
||||
pre = self.prev
|
||||
|
||||
if target != rtarget and parent['type'] == 'and/or':
|
||||
self.fixed_jumps[pos] = rtarget
|
||||
return
|
||||
# does this jump to right after another cond jump?
|
||||
# if so, it's part of a larger conditional
|
||||
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
|
||||
PJIF, PJIT)) and (target > pos):
|
||||
self.fixed_jumps[pos] = pre[target]
|
||||
self.structs.append({'type': 'and/or',
|
||||
'start': start,
|
||||
'end': pre[target]})
|
||||
return
|
||||
|
||||
# is this an if and
|
||||
if op == PJIF:
|
||||
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
|
||||
match = self.remove_mid_line_ifs(match)
|
||||
|
||||
if match:
|
||||
if code[pre[rtarget]] in (JF, JA) \
|
||||
and pre[rtarget] not in self.stmts \
|
||||
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
|
||||
if code[pre[pre[rtarget]]] == JA \
|
||||
and self.remove_mid_line_ifs([pos]) \
|
||||
and target == self.get_target(pre[pre[rtarget]]) \
|
||||
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
|
||||
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
|
||||
pass
|
||||
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
|
||||
and self.remove_mid_line_ifs([pos]) \
|
||||
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
|
||||
pre[pre[rtarget]],
|
||||
(PJIF, PJIT), target)))
|
||||
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]],
|
||||
(PJIF, PJIT, JA), pre[rtarget], True))))):
|
||||
pass
|
||||
else:
|
||||
fix = None
|
||||
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
|
||||
last_jump_good = True
|
||||
for j in jump_ifs:
|
||||
if target == self.get_target(j):
|
||||
if self.lines[j].next == j+3 and last_jump_good:
|
||||
fix = j
|
||||
break
|
||||
else:
|
||||
last_jump_good = False
|
||||
self.fixed_jumps[pos] = fix or match[-1]
|
||||
return
|
||||
else:
|
||||
self.fixed_jumps[pos] = match[-1]
|
||||
return
|
||||
else: # op == PJIT
|
||||
if (pos+3) in self.load_asserts:
|
||||
if code[pre[rtarget]] == RAISE_VARARGS:
|
||||
return
|
||||
self.load_asserts.remove(pos+3)
|
||||
|
||||
next = self.next_stmt[pos]
|
||||
if pre[next] == pos:
|
||||
pass
|
||||
elif code[next] in (JF, JA) and target == self.get_target(next):
|
||||
if code[pre[next]] == PJIF:
|
||||
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
|
||||
self.fixed_jumps[pos] = pre[next]
|
||||
return
|
||||
elif code[next] == JA and code[target] in (JA, JF):
|
||||
next_target = self.get_target(next)
|
||||
if self.get_target(target) == next_target:
|
||||
self.fixed_jumps[pos] = pre[next]
|
||||
return
|
||||
elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target):
|
||||
self.fixed_jumps[pos] = pre[next]
|
||||
return
|
||||
|
||||
# don't add a struct for a while test, it's already taken care of
|
||||
if pos in self.ignore_if:
|
||||
return
|
||||
|
||||
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
|
||||
and pre[rtarget] != pos and pre[pre[rtarget]] != pos:
|
||||
if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK:
|
||||
if code[pre[pre[rtarget]]] != JA:
|
||||
pass
|
||||
elif self.get_target(pre[pre[rtarget]]) != target:
|
||||
pass
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
else:
|
||||
rtarget = pre[rtarget]
|
||||
# does the if jump just beyond a jump op, then this is probably an if statement
|
||||
if code[pre[rtarget]] in (JA, JF):
|
||||
if_end = self.get_target(pre[rtarget])
|
||||
|
||||
# is this a loop not an if?
|
||||
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
|
||||
if(if_end > start):
|
||||
return
|
||||
|
||||
end = self.restrict_to_parent(if_end, parent)
|
||||
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': pre[rtarget]})
|
||||
self.not_continue.add(pre[rtarget])
|
||||
|
||||
if rtarget < end:
|
||||
self.structs.append({'type': 'if-else',
|
||||
'start': rtarget,
|
||||
'end': end})
|
||||
elif code[pre[rtarget]] == RETURN_VALUE:
|
||||
self.structs.append({'type': 'if-then',
|
||||
'start': start,
|
||||
'end': rtarget})
|
||||
self.return_end_ifs.add(pre[rtarget])
|
||||
|
||||
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
target = self.get_target(pos, op)
|
||||
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
|
||||
|
||||
def find_jump_targets(self):
|
||||
'''
|
||||
Detect all offsets in a byte code which are jump targets.
|
||||
|
||||
Return the list of offsets.
|
||||
|
||||
This procedure is modelled after dis.findlabels(), but here
|
||||
for each target the number of jumps are counted.
|
||||
'''
|
||||
|
||||
n = len(self.code)
|
||||
self.structs = [{'type': 'root',
|
||||
'start': 0,
|
||||
'end': n-1}]
|
||||
self.loops = [] # All loop entry points
|
||||
self.fixed_jumps = {} # Map fixed jumps to their real destination
|
||||
self.ignore_if = set()
|
||||
self.build_stmt_indices()
|
||||
|
||||
# Containers filled by detect_structure()
|
||||
self.not_continue = set()
|
||||
self.return_end_ifs = set()
|
||||
|
||||
targets = {}
|
||||
for i in self.op_range(0, n):
|
||||
op = self.code[i]
|
||||
|
||||
# Determine structures and fix jumps in Python versions
|
||||
# since 2.3
|
||||
self.detect_structure(i, op)
|
||||
|
||||
if op >= HAVE_ARGUMENT:
|
||||
label = self.fixed_jumps.get(i)
|
||||
oparg = self.code[i+1] + self.code[i+2] * 256
|
||||
if label is None:
|
||||
if op in hasjrel and op != FOR_ITER:
|
||||
label = i + 3 + oparg
|
||||
elif op in hasjabs:
|
||||
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
|
||||
if (oparg > i):
|
||||
label = oparg
|
||||
|
||||
if label is not None and label != -1:
|
||||
targets[label] = targets.get(label, []) + [i]
|
||||
elif op == END_FINALLY and i in self.fixed_jumps:
|
||||
label = self.fixed_jumps[i]
|
||||
targets[label] = targets.get(label, []) + [i]
|
||||
return targets
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner27().disassemble(co)
|
||||
for t in tokens:
|
||||
print(t)
|
||||
pass
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
if PYTHON_VERSION == 2.7:
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
tokens, customize = Scanner27().disassemble(co)
|
||||
for t in tokens:
|
||||
print(t.format())
|
||||
pass
|
||||
else:
|
||||
print("Need to be Python 2.7 to demo; I am %s." %
|
||||
PYTHON_VERSION)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# Copyright (c) 2015, 2016 by Rocky Bernstein
|
||||
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
|
||||
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
|
||||
# Copyright (c) 1999 John Aycock
|
||||
"""
|
||||
Python 3 Generic bytecode scanner/deparser
|
||||
|
||||
@@ -29,8 +28,6 @@ from array import array
|
||||
from xdis.code import iscode
|
||||
from xdis.bytecode import Bytecode, findlinestarts
|
||||
from uncompyle6.scanner import Token
|
||||
from uncompyle6 import PYTHON3
|
||||
|
||||
|
||||
# Get all the opcodes into globals
|
||||
import xdis.opcodes.opcode_33 as op3
|
||||
@@ -205,213 +202,6 @@ class Scanner3(scan.Scanner):
|
||||
pass
|
||||
return tokens, {}
|
||||
|
||||
def disassemble_generic(self, co, classname=None, code_objects={}):
|
||||
"""
|
||||
Convert code object <co> into a sequence of tokens.
|
||||
|
||||
The below is based on (an older version?) of Python dis.disassemble_bytes().
|
||||
"""
|
||||
|
||||
# dis.disassemble(co) # DEBUG
|
||||
# Container for tokens
|
||||
tokens = []
|
||||
customize = {}
|
||||
self.code = code = array('B', co.co_code)
|
||||
codelen = len(code)
|
||||
self.build_lines_data(co)
|
||||
self.build_prev_op()
|
||||
self.code_objects = code_objects
|
||||
|
||||
# self.lines contains (block,addrLastInstr)
|
||||
if classname:
|
||||
classname = '_' + classname.lstrip('_') + '__'
|
||||
|
||||
def unmangle(name):
|
||||
if name.startswith(classname) and name[-2:] != '__':
|
||||
return name[len(classname) - 2:]
|
||||
return name
|
||||
|
||||
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
|
||||
names = [ unmangle(name) for name in co.co_names ]
|
||||
varnames = [ unmangle(name) for name in co.co_varnames ]
|
||||
else:
|
||||
free = co.co_cellvars + co.co_freevars
|
||||
names = co.co_names
|
||||
varnames = co.co_varnames
|
||||
pass
|
||||
|
||||
# Scan for assertions. Later we will
|
||||
# turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
|
||||
# assertions
|
||||
self.load_asserts = set()
|
||||
for i in self.op_range(0, codelen):
|
||||
if (self.code[i] == POP_JUMP_IF_TRUE and
|
||||
self.code[i+3] == LOAD_GLOBAL):
|
||||
if names[self.get_argument(i+3)] == 'AssertionError':
|
||||
self.load_asserts.add(i+3)
|
||||
|
||||
# Get jump targets
|
||||
# Format: {target offset: [jump offsets]}
|
||||
jump_targets = self.find_jump_targets()
|
||||
|
||||
# contains (code, [addrRefToCode])
|
||||
last_stmt = self.next_stmt[0]
|
||||
i = self.next_stmt[last_stmt]
|
||||
replace = {}
|
||||
|
||||
imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
|
||||
if len(imports) > 1:
|
||||
last_import = imports[0]
|
||||
for i in imports[1:]:
|
||||
if self.lines[last_import].next > i:
|
||||
if self.code[last_import] == IMPORT_NAME == self.code[i]:
|
||||
replace[i] = 'IMPORT_NAME_CONT'
|
||||
last_import = i
|
||||
|
||||
# Initialize extended arg at 0. When extended arg op is encountered,
|
||||
# variable preserved for next cycle and added as arg for next op
|
||||
extended_arg = 0
|
||||
|
||||
for offset in self.op_range(0, codelen):
|
||||
|
||||
# Add jump target tokens
|
||||
if offset in jump_targets:
|
||||
jump_idx = 0
|
||||
for jump_offset in jump_targets[offset]:
|
||||
tokens.append(Token('COME_FROM', None, repr(jump_offset),
|
||||
offset='%s_%s' % (offset, jump_idx)))
|
||||
jump_idx += 1
|
||||
pass
|
||||
pass
|
||||
|
||||
op = code[offset]
|
||||
op_name = self.opname[op]
|
||||
|
||||
oparg = None; pattr = None
|
||||
|
||||
if op >= op3.HAVE_ARGUMENT:
|
||||
oparg = self.get_argument(offset) + extended_arg
|
||||
extended_arg = 0
|
||||
if op == op3.EXTENDED_ARG:
|
||||
extended_arg = oparg * scan.L65536
|
||||
continue
|
||||
if op in op3.hasconst:
|
||||
const = co.co_consts[oparg]
|
||||
if not PYTHON3 and isinstance(const, str):
|
||||
if const in code_objects:
|
||||
const = code_objects[const]
|
||||
if iscode(const):
|
||||
oparg = const
|
||||
if const.co_name == '<lambda>':
|
||||
assert op_name == 'LOAD_CONST'
|
||||
op_name = 'LOAD_LAMBDA'
|
||||
elif const.co_name == '<genexpr>':
|
||||
op_name = 'LOAD_GENEXPR'
|
||||
elif const.co_name == '<dictcomp>':
|
||||
op_name = 'LOAD_DICTCOMP'
|
||||
elif const.co_name == '<setcomp>':
|
||||
op_name = 'LOAD_SETCOMP'
|
||||
elif const.co_name == '<listcomp>':
|
||||
op_name = 'LOAD_LISTCOMP'
|
||||
# verify() uses 'pattr' for comparison, since 'attr'
|
||||
# now holds Code(const) and thus can not be used
|
||||
# for comparison (todo: think about changing this)
|
||||
# pattr = 'code_object @ 0x%x %s->%s' %\
|
||||
# (id(const), const.co_filename, const.co_name)
|
||||
pattr = '<code_object ' + const.co_name + '>'
|
||||
else:
|
||||
pattr = const
|
||||
elif op in op3.hasname:
|
||||
pattr = names[oparg]
|
||||
elif op in op3.hasjrel:
|
||||
pattr = repr(offset + 3 + oparg)
|
||||
elif op in op3.hasjabs:
|
||||
pattr = repr(oparg)
|
||||
elif op in op3.haslocal:
|
||||
pattr = varnames[oparg]
|
||||
elif op in op3.hascompare:
|
||||
pattr = op3.cmp_op[oparg]
|
||||
elif op in op3.hasfree:
|
||||
pattr = free[oparg]
|
||||
|
||||
if op_name == 'MAKE_FUNCTION':
|
||||
argc = oparg
|
||||
attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF)
|
||||
pos_args, name_pair_args, annotate_args = attr
|
||||
if name_pair_args > 0:
|
||||
op_name = 'MAKE_FUNCTION_N%d' % name_pair_args
|
||||
pass
|
||||
if annotate_args > 0:
|
||||
op_name = '%s_A_%d' % [op_name, annotate_args]
|
||||
pass
|
||||
op_name = '%s_%d' % (op_name, pos_args)
|
||||
pattr = ("%d positional, %d keyword pair, %d annotated" %
|
||||
(pos_args, name_pair_args, annotate_args))
|
||||
tokens.append(
|
||||
Token(
|
||||
type_ = op_name,
|
||||
attr = (pos_args, name_pair_args, annotate_args),
|
||||
pattr = pattr,
|
||||
offset = offset,
|
||||
linestart = linestart)
|
||||
)
|
||||
continue
|
||||
elif op_name in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
|
||||
'UNPACK_SEQUENCE',
|
||||
'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE',
|
||||
'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
|
||||
'CALL_FUNCTION_VAR_KW', 'RAISE_VARARGS'
|
||||
):
|
||||
# CALL_FUNCTION OP renaming is done as a custom rule in parse3
|
||||
if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
|
||||
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW',
|
||||
):
|
||||
op_name = '%s_%d' % (op_name, oparg)
|
||||
if op_name != 'BUILD_SLICE':
|
||||
customize[op_name] = oparg
|
||||
elif op_name == 'JUMP_ABSOLUTE':
|
||||
target = self.get_target(offset)
|
||||
if target < offset:
|
||||
if (offset in self.stmts
|
||||
and self.code[offset+3] not in (END_FINALLY, POP_BLOCK)
|
||||
and offset not in self.not_continue):
|
||||
op_name = 'CONTINUE'
|
||||
else:
|
||||
op_name = 'JUMP_BACK'
|
||||
pass
|
||||
pass
|
||||
pass
|
||||
elif op_name == 'JUMP_FORWARD':
|
||||
# Python 3.5 will optimize out a JUMP_FORWARD to the
|
||||
# next instruction while Python 3.2 won't. Smplify
|
||||
# grammar rules working with both 3.2 and 3.5,
|
||||
# by optimizing the way Python 3.5 does it.
|
||||
#
|
||||
# We may however want to consider whether we do
|
||||
# this in 3.5 or not.
|
||||
if oparg == 0 and self.version >= 3.5:
|
||||
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
|
||||
continue
|
||||
elif op_name == 'LOAD_GLOBAL':
|
||||
if offset in self.load_asserts:
|
||||
op_name = 'LOAD_ASSERT'
|
||||
|
||||
if offset in self.linestarts:
|
||||
linestart = self.linestarts[offset]
|
||||
else:
|
||||
linestart = None
|
||||
|
||||
if offset not in replace:
|
||||
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
|
||||
else:
|
||||
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
|
||||
pass
|
||||
|
||||
# debug:
|
||||
# for t in tokens:
|
||||
# print(t)
|
||||
return tokens, customize
|
||||
|
||||
def build_lines_data(self, code_obj):
|
||||
"""
|
||||
Generate various line-related helper data.
|
||||
@@ -905,7 +695,7 @@ if __name__ == "__main__":
|
||||
import inspect
|
||||
co = inspect.currentframe().f_code
|
||||
from uncompyle6 import PYTHON_VERSION
|
||||
tokens, customize = Scanner3(PYTHON_VERSION).disassemble3(co)
|
||||
tokens, customize = Scanner3(PYTHON_VERSION).disassemble(co)
|
||||
for t in tokens:
|
||||
print(t.format())
|
||||
else:
|
||||
|
Reference in New Issue
Block a user