Merge branch 'python-2.3'

This commit is contained in:
rocky
2016-07-09 09:17:17 -04:00
7 changed files with 34 additions and 1201 deletions

View File

@@ -51,7 +51,7 @@ check-bytecode-3:
#: Check deparsing bytecode that works running Python 2 and Python 3
check-bytecode: check-bytecode-3
$(PYTHON) test_pythonlib.py --bytecode-2.4 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7
$(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.4 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7
#: Check deparsing Python 2.3
check-bytecode-2.3:

View File

@@ -24,7 +24,7 @@ class Python2Parser(PythonParser):
super(Python2Parser, self).__init__(AST, 'stmts', debug=debug_parser)
self.new_rules = set()
def p_print(self, args):
def p_print2(self, args):
'''
stmt ::= print_items_stmt
stmt ::= print_nl

View File

@@ -6,495 +6,12 @@ from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parser import PythonParserSingle
from uncompyle6.parsers.parse2 import Python2Parser
class Python23Parser(PythonParser):
class Python23Parser(Python24Parser):
def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG):
super(Python23Parser, self).__init__(AST, 'stmts', debug=debug_parser)
super(Python24Parser, self).__init__(debug_parser)
self.customized = {}
# FIXME: A lot of the functions below overwrite what is in parse.py which
# have more rules. Probly that should be stripped down more instead.
def p_funcdef(self, args):
'''
stmt ::= funcdef
funcdef ::= mkfunc designator
load_closure ::= load_closure LOAD_CLOSURE
load_closure ::= LOAD_CLOSURE
'''
def p_list_comprehension(self, args):
'''
expr ::= list_compr
list_compr ::= BUILD_LIST_0 DUP_TOP _load_attr
designator list_iter del_stmt
list_iter ::= list_for
list_iter ::= list_if
list_iter ::= lc_body
_load_attr ::= LOAD_ATTR
_load_attr ::=
_lcfor ::= GET_ITER LIST_COMPREHENSION_START FOR_ITER
_lcfor ::= LOAD_CONST FOR_LOOP
_lcfor2 ::= GET_ITER FOR_ITER
_lcfor2 ::= LOAD_CONST FOR_LOOP
list_for ::= expr _lcfor designator list_iter
LIST_COMPREHENSION_END JUMP_ABSOLUTE
list_for ::= expr _lcfor2 designator list_iter
JUMP_ABSOLUTE
list_if ::= expr condjmp IF_THEN_START list_iter
IF_THEN_END _jump POP_TOP IF_ELSE_START IF_ELSE_END
lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP
lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP
lc_body ::= LOAD_NAME expr LIST_APPEND
lc_body ::= LOAD_FAST expr LIST_APPEND
'''
def p_augmented_assign(self, args):
'''
stmt ::= augassign1
stmt ::= augassign2
augassign1 ::= expr expr inplace_op designator
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
inplace_op ROT_TWO STORE_ATTR
inplace_op ::= INPLACE_ADD
inplace_op ::= INPLACE_SUBTRACT
inplace_op ::= INPLACE_MULTIPLY
inplace_op ::= INPLACE_DIVIDE
inplace_op ::= INPLACE_TRUE_DIVIDE
inplace_op ::= INPLACE_FLOOR_DIVIDE
inplace_op ::= INPLACE_MODULO
inplace_op ::= INPLACE_POWER
inplace_op ::= INPLACE_LSHIFT
inplace_op ::= INPLACE_RSHIFT
inplace_op ::= INPLACE_AND
inplace_op ::= INPLACE_XOR
inplace_op ::= INPLACE_OR
'''
def p_assign(self, args):
'''
stmt ::= assign
assign ::= expr DUP_TOP designList
assign ::= expr designator
'''
def p_print(self, args):
'''
stmt ::= print_stmt
stmt ::= print_stmt_nl
stmt ::= print_nl_stmt
print_stmt ::= expr PRINT_ITEM
print_nl_stmt ::= PRINT_NEWLINE
print_stmt_nl ::= print_stmt print_nl_stmt
'''
def p_print_to(self, args):
'''
stmt ::= print_to
stmt ::= print_to_nl
stmt ::= print_nl_to
print_to ::= expr print_to_items POP_TOP
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
print_nl_to ::= expr PRINT_NEWLINE_TO
print_to_items ::= print_to_items print_to_item
print_to_items ::= print_to_item
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
'''
# expr print_to* POP_TOP
# expr { print_to* } PRINT_NEWLINE_TO
def p_import15(self, args):
'''
stmt ::= importstmt
stmt ::= importfrom
importstmt ::= IMPORT_NAME STORE_FAST
importstmt ::= IMPORT_NAME STORE_NAME
importfrom ::= IMPORT_NAME importlist POP_TOP
importlist ::= importlist IMPORT_FROM
importlist ::= IMPORT_FROM
'''
# Python 2.0 - 2.3 imports
def p_import20_23(self, args):
'''
stmt ::= importstmt20
stmt ::= importfrom20
stmt ::= importstar20
importstmt20 ::= LOAD_CONST import_as
importstar20 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR
importfrom20 ::= LOAD_CONST IMPORT_NAME importlist20 POP_TOP
importlist20 ::= importlist20 import_as
importlist20 ::= import_as
import_as ::= IMPORT_NAME designator
import_as ::= IMPORT_NAME LOAD_ATTR designator
import_as ::= IMPORT_FROM designator
'''
def p_grammar(self, args):
'''
stmts ::= stmts stmt
stmts ::= stmt
stmts_opt ::= stmts
stmts_opt ::= passstmt
passstmt ::=
stmt ::= classdef
stmt ::= call_stmt
call_stmt ::= expr POP_TOP
stmt ::= return_stmt
return_stmt ::= expr RETURN_VALUE
stmt ::= yield_stmt
yield_stmt ::= expr YIELD_STMT
yield_stmt ::= expr YIELD_VALUE
stmt ::= break_stmt
break_stmt ::= BREAK_LOOP
stmt ::= continue_stmt
continue_stmt ::= JUMP_ABSOLUTE
continue_stmt ::= CONTINUE_LOOP
stmt ::= raise_stmt
raise_stmt ::= exprlist RAISE_VARARGS
raise_stmt ::= RAISE_VARARGS
stmt ::= exec_stmt
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
exec_stmt ::= expr exprlist EXEC_STMT
stmt ::= assert
stmt ::= assert2
stmt ::= assert3
stmt ::= assert4
stmt ::= ifstmt
stmt ::= ifelsestmt
stmt ::= whilestmt
stmt ::= while1stmt
stmt ::= while12stmt
stmt ::= whileelsestmt
stmt ::= while1elsestmt
stmt ::= while12elsestmt
stmt ::= forstmt
stmt ::= forelsestmt
stmt ::= trystmt
stmt ::= tryfinallystmt
stmt ::= del_stmt
del_stmt ::= DELETE_FAST
del_stmt ::= DELETE_NAME
del_stmt ::= DELETE_GLOBAL
del_stmt ::= expr DELETE_SLICE+0
del_stmt ::= expr expr DELETE_SLICE+1
del_stmt ::= expr expr DELETE_SLICE+2
del_stmt ::= expr expr expr DELETE_SLICE+3
del_stmt ::= delete_subscr
delete_subscr ::= expr expr DELETE_SUBSCR
del_stmt ::= expr DELETE_ATTR
kwarg ::= LOAD_CONST expr
classdef ::= LOAD_CONST expr mkfunc
CALL_FUNCTION_0 BUILD_CLASS designator
condjmp ::= JUMP_IF_FALSE POP_TOP
condjmp ::= JUMP_IF_TRUE POP_TOP
assert ::= expr JUMP_IF_FALSE POP_TOP
LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS
LOGIC_TEST_END LOGIC_TEST_END POP_TOP
assert2 ::= expr JUMP_IF_FALSE POP_TOP
LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS
LOGIC_TEST_END LOGIC_TEST_END POP_TOP
assert3 ::= expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS
LOGIC_TEST_END POP_TOP
assert4 ::= expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS
LOGIC_TEST_END POP_TOP
_jump ::= JUMP_ABSOLUTE
_jump ::= JUMP_FORWARD
ifstmt ::= expr condjmp
IF_THEN_START stmts_opt IF_THEN_END
_jump POP_TOP IF_ELSE_START IF_ELSE_END
ifelsestmt ::= expr condjmp
IF_THEN_START stmts_opt IF_THEN_END
_jump POP_TOP IF_ELSE_START stmts IF_ELSE_END
trystmt ::= SETUP_EXCEPT TRY_START stmts_opt
TRY_END POP_BLOCK _jump
except_stmt
try_end ::= END_FINALLY TRY_ELSE_START TRY_ELSE_END
try_end ::= except_else
except_else ::= END_FINALLY TRY_ELSE_START stmts TRY_ELSE_END
except_stmt ::= except_stmt except_cond
except_stmt ::= except_conds try_end
except_stmt ::= except try_end
except_stmt ::= try_end
except_conds ::= except_conds except_cond
except_conds ::=
except_cond ::= except_cond1
except_cond ::= except_cond2
except_cond1 ::= EXCEPT_START DUP_TOP expr COMPARE_OP
JUMP_IF_FALSE
POP_TOP POP_TOP POP_TOP POP_TOP
stmts_opt EXCEPT_END _jump POP_TOP
except_cond2 ::= EXCEPT_START DUP_TOP expr COMPARE_OP
JUMP_IF_FALSE
POP_TOP POP_TOP designator POP_TOP
stmts_opt EXCEPT_END _jump POP_TOP
except ::= EXCEPT_START POP_TOP POP_TOP POP_TOP
stmts_opt EXCEPT_END _jump
tryfinallystmt ::= SETUP_FINALLY stmts_opt
POP_BLOCK LOAD_CONST
stmts_opt END_FINALLY
_while1test ::= _jump JUMP_IF_FALSE POP_TOP
_while1test ::=
whilestmt ::= SETUP_LOOP WHILE_START
expr condjmp
stmts_opt WHILE_END JUMP_ABSOLUTE
WHILE_ELSE_START POP_TOP POP_BLOCK WHILE_ELSE_END
while1stmt ::= SETUP_LOOP _while1test WHILE1_START
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END
while12stmt ::= SETUP_LOOP WHILE1_START
_jump JUMP_IF_FALSE POP_TOP
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END
whileelsestmt ::= SETUP_LOOP WHILE_START
expr condjmp
stmts_opt WHILE_END JUMP_ABSOLUTE
WHILE_ELSE_START POP_TOP POP_BLOCK
stmts WHILE_ELSE_END
while1elsestmt ::= SETUP_LOOP _while1test WHILE1_START
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK
stmts WHILE1_ELSE_END
while12elsestmt ::= SETUP_LOOP WHILE1_START
_jump JUMP_IF_FALSE POP_TOP
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK
stmts WHILE1_ELSE_END
_for ::= GET_ITER FOR_START FOR_ITER
_for ::= LOAD_CONST FOR_LOOP
forstmt ::= SETUP_LOOP expr _for designator
stmts_opt FOR_END JUMP_ABSOLUTE
FOR_ELSE_START POP_BLOCK FOR_ELSE_END
forelsestmt ::= SETUP_LOOP expr _for designator
stmts_opt FOR_END JUMP_ABSOLUTE
FOR_ELSE_START POP_BLOCK stmts FOR_ELSE_END
'''
def p_expr(self, args):
'''
expr ::= load_closure mklambda
expr ::= mklambda
expr ::= SET_LINENO
expr ::= LOAD_FAST
expr ::= LOAD_NAME
expr ::= LOAD_CONST
expr ::= LOAD_GLOBAL
expr ::= LOAD_DEREF
expr ::= LOAD_LOCALS
expr ::= expr LOAD_ATTR
expr ::= binary_expr
expr ::= build_list
binary_expr ::= expr expr binary_op
binary_op ::= BINARY_ADD
binary_op ::= BINARY_SUBTRACT
binary_op ::= BINARY_MULTIPLY
binary_op ::= BINARY_DIVIDE
binary_op ::= BINARY_TRUE_DIVIDE
binary_op ::= BINARY_FLOOR_DIVIDE
binary_op ::= BINARY_MODULO
binary_op ::= BINARY_LSHIFT
binary_op ::= BINARY_RSHIFT
binary_op ::= BINARY_AND
binary_op ::= BINARY_OR
binary_op ::= BINARY_XOR
binary_op ::= BINARY_POWER
expr ::= binary_subscr
binary_subscr ::= expr expr BINARY_SUBSCR
expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
expr ::= cmp
expr ::= expr UNARY_POSITIVE
expr ::= expr UNARY_NEGATIVE
expr ::= expr UNARY_CONVERT
expr ::= expr UNARY_INVERT
expr ::= expr UNARY_NOT
expr ::= mapexpr
expr ::= expr SLICE+0
expr ::= expr expr SLICE+1
expr ::= expr expr SLICE+2
expr ::= expr expr expr SLICE+3
expr ::= expr DUP_TOP SLICE+0
expr ::= expr expr DUP_TOPX_2 SLICE+1
expr ::= expr expr DUP_TOPX_2 SLICE+2
expr ::= expr expr expr DUP_TOPX_3 SLICE+3
expr ::= and
expr ::= and2
expr ::= or
or ::= expr JUMP_IF_TRUE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END
and ::= expr JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END
and2 ::= _jump JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END
cmp ::= cmp_list
cmp ::= compare
compare ::= expr expr COMPARE_OP
cmp_list ::= expr cmp_list1 ROT_TWO IF_ELSE_START POP_TOP
IF_ELSE_END
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
cmp_list1
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
IF_THEN_START cmp_list1
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
IF_THEN_START cmp_list2
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
cmp_list2
cmp_list2 ::= expr COMPARE_OP IF_THEN_END JUMP_FORWARD
mapexpr ::= BUILD_MAP kvlist
kvlist ::= kvlist kv
kvlist ::= kvlist kv2
kvlist ::=
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
exprlist ::= exprlist expr
exprlist ::= expr
'''
def nonterminal(self, nt, args):
collect = ('stmts', 'exprlist', 'kvlist')
if nt in collect and len(args) > 1:
#
# Collect iterated thingies together.
#
rv = args[0]
rv.append(args[1])
else:
rv = GenericASTBuilder.nonterminal(self, nt, args)
return rv
def __ambiguity(self, children):
# only for debugging! to be removed hG/2000-10-15
print children
return GenericASTBuilder.ambiguity(self, children)
def resolve(self, list):
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
return 'funcdef'
#print >> sys.stderr, 'resolve', str(list)
return GenericASTBuilder.resolve(self, list)
def add_custom_rules(self, tokens, customize):
"""
Special handling for opcodes that take a variable number
of arguments -- we add a new rule for each:
expr ::= {expr}^n BUILD_LIST_n
expr ::= {expr}^n BUILD_TUPLE_n
expr ::= {expr}^n BUILD_SLICE_n
unpack_list ::= UNPACK_LIST {expr}^n
unpack ::= UNPACK_TUPLE {expr}^n
unpack ::= UNPACK_SEQEUENE {expr}^n
mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
"""
for k, v in customize.items():
# avoid adding the same rule twice to this parser
if self.customized.has_key(k):
continue
self.customized[k] = None
#nop_func = lambda self, args: None
op = k[:string.rfind(k, '_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op == 'BUILD_SLICE':
rule = 'expr ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op == 'DUP_TOPX':
# no need to add a rule
continue
#rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
self.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop_func)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop_func)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) / 3
rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise 'unknown customize token %s' % k
self.addRule(rule, nop_func)
return
pass
class Python23ParserSingle(Python23Parser, PythonParserSingle):
pass
@@ -502,6 +19,7 @@ if __name__ == '__main__':
# Check grammar
p = Python23Parser()
p.checkGrammar()
p.dumpGrammar()
# local variables:
# tab-width: 4

View File

@@ -36,7 +36,7 @@ class Python24Parser(Python25Parser):
gen_comp_body ::= expr YIELD_VALUE
'''
class Python24ParserSingle(Python25Parser, PythonParserSingle):
class Python24ParserSingle(Python24Parser, PythonParserSingle):
pass
if __name__ == '__main__':

View File

@@ -1,5 +1,9 @@
from uncompyle6.parser import PythonParserSingle
# Copyright (c) 2016 Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parser import PythonParserSingle
from uncompyle6.parsers.parse2 import Python2Parser
class Python27Parser(Python2Parser):

View File

@@ -22,11 +22,7 @@ from uncompyle6 import PYTHON3
from uncompyle6.scanners.tok import Token
# The byte code versions we support
if PYTHON3:
# Need to work out Python 2.3. ord's in PYTHON3
PYTHON_VERSIONS = (2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5)
else:
PYTHON_VERSIONS = (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5)
PYTHON_VERSIONS = (2.3, 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5)
# FIXME: DRY
if PYTHON3:

View File

@@ -1,712 +1,27 @@
# Copyright (c) 2016 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
"""
Python 2.3 bytecode scanner
Python 2.4 bytecode scanner/deparser
This overlaps Python's 2.3's dis module, but it can be run from Python 3 and
other versions of Python. Also, we save token information for later
use in deparsing.
This overlaps Python's 2.4's dis module, but it can be run from
Python 3 and other versions of Python. Also, we save token
information for later use in deparsing.
"""
from uncompyle6.scanners.scanner2 import Scanner2
from uncompyle6.scanner import L65536
class Scanner23(Scanner2):
def __init__(self, show_asm=None):
super(Scanner23, self).__init__(2.3, show_asm)
# Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't
# Add an empty set make processing more uniform.
self.pop_jump_if_or_pop = frozenset([])
def disassemble(self, co, code_objects={}, show_asm=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
if self.show_asm in ('both', 'before'):
from xdis.bytecode import Bytecode
bytecode = Bytecode(co, self.opc)
for instr in bytecode.get_instructions(co):
print(instr._disassemble())
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
self.code = co.co_code
structures = self.find_structures(self.code)
#cf = self.find_jump_targets(code)
n = len(self.code)
i = 0
extended_arg = 0
free = None
while i < n:
offset = i
if structures.has_key(offset):
j = 0
for elem in structures[offset]:
tokens.append(Token(elem, offset="%s_%d" % (offset, j)))
j += 1
c = self.code[i]
op = ord(c)
opname = self.opc.opname[op]
i += 1
oparg = None; pattr = None
if op >= self.opc.HAVE_ARGUMENT:
oparg = ord(self.code[i]) + ord(self.code[i+1]) * 256 + extended_arg
extended_arg = 0
i += 2
if op == self.opc.EXTENDED_ARG:
extended_arg = oparg * L65536
if op in self.opc.hasconst:
const = co.co_consts[oparg]
# We can't use inspect.iscode() because we may be
# using a different version of Python than the
# one that this was byte-compiled on. So the code
# types may mismatch.
if hasattr(const, 'co_name'):
oparg = const
const = oparg
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
# verify uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in self.opc.hasname:
pattr = co.co_names[oparg]
elif op in self.opc.hasjrel:
pattr = repr(i + oparg)
elif op in self.opc.hasjabs:
pattr = repr(oparg)
elif op in self.opc.haslocal:
pattr = co.co_varnames[oparg]
elif op in self.opc.hascompare:
pattr = self.opc.cmp_op[oparg]
elif op in self.opc.hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
pattr = free[oparg]
if opname == 'SET_LINENO':
continue
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE',
'UNPACK_LIST', 'UNPACK_TUPLE', 'UNPACK_SEQUENCE',
'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE',
'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
'CALL_FUNCTION_VAR_KW', 'DUP_TOPX',
):
opname = '%s_%d' % (opname, oparg)
customize[opname] = oparg
tokens.append(Token(opname, oparg, pattr, offset))
pass
if self.show_asm:
for t in tokens:
print(t)
print()
return tokens, customize
def __get_target(self, code, pos, op=None):
if op is None:
op = ord(code[pos])
target = ord(code[pos+1]) + ord(code[pos+2]) * 256
if op in self.self.opc.hasjrel:
target += pos + 3
return target
def __first_instr(self, code, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
assert(start>=0 and end<len(code))
HAVE_ARGUMENT = self.self.opc.HAVE_ARGUMENT
try: instr[0]
except: instr = [instr]
pos = None
distance = len(code)
i = start
while i < end:
op = ord(code[i])
if op in instr:
if target is None:
return i
dest = self.__get_target(code, i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
if op < HAVE_ARGUMENT:
i += 1
else:
i += 3
return pos
def __last_instr(self, code, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
assert(start>=0 and end<len(code))
HAVE_ARGUMENT = self.self.opc.HAVE_ARGUMENT
try: instr[0]
except: instr = [instr]
pos = None
distance = len(code)
i = start
while i < end:
op = ord(code[i])
if op in instr:
if target is None:
pos = i
else:
dest = self.__get_target(code, i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
if op < HAVE_ARGUMENT:
i += 1
else:
i += 3
return pos
def __all_instr(self, code, start, end, instr, target=None):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
assert(start>=0 and end<len(code))
HAVE_ARGUMENT = self.self.opc.HAVE_ARGUMENT
try: instr[0]
except: instr = [instr]
result = []
i = start
while i < end:
op = ord(code[i])
if op in instr:
if target is None:
result.append(i)
elif target == self.__get_target(code, i, op):
result.append(i)
if op < HAVE_ARGUMENT:
i += 1
else:
i += 3
return result
def __next_except_jump(self, code, start, end, target):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT
JUMP_FORWARD = self.opc.opmap['JUMP_FORWARD']
JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE']
END_FINALLY = self.opc.opmap['END_FINALLY']
POP_TOP = self.opc.opmap['POP_TOP']
DUP_TOP = self.opc.opmap['DUP_TOP']
try: SET_LINENO = self.opc.opmap['SET_LINENO']
except: SET_LINENO = None
lookup = [JUMP_ABSOLUTE, JUMP_FORWARD]
while start < end:
jmp = self.__first_instr(code, start, end, lookup, target)
if jmp is None:
return None
if jmp == end-3:
return jmp
ops = [None, None, None, None]
opp = [0, 0, 0, 0]
pos = 0
x = jmp+3
while x <= end and pos < 4:
op = ord(code[x])
if op == SET_LINENO:
x += 3
continue
elif op >= HAVE_ARGUMENT:
break
ops[pos] = op
opp[pos] = x
pos += 1
x += 1
if ops[0] == POP_TOP and ops[1] == END_FINALLY and opp[1] == end:
return jmp
if ops[0] == POP_TOP and ops[1] == DUP_TOP:
return jmp
if ops[0] == ops[1] == ops[2] == ops[3] == POP_TOP:
return jmp
start = jmp + 3
return None
def __list_comprehension(self, code, pos, op=None):
"""
Determine if there is a list comprehension structure starting at pos
"""
BUILD_LIST = self.opc.opmap['BUILD_LIST']
DUP_TOP = self.opc.opmap['DUP_TOP']
LOAD_ATTR = self.opc.opmap['LOAD_ATTR']
if op is None:
op = ord(code[pos])
if op != BUILD_LIST:
return 0
try:
elems = ord(code[pos+1]) + ord(code[pos+2])*256
codes = (op, elems, ord(code[pos+3]), ord(code[pos+4]))
except IndexError:
return 0
return (codes==(BUILD_LIST, 0, DUP_TOP, LOAD_ATTR))
def __ignore_if(self, code, pos):
"""
Return true if this 'if' is to be ignored.
"""
POP_TOP = self.opc.opmap['POP_TOP']
COMPARE_OP = self.opc.opmap['COMPARE_OP']
EXCEPT_MATCH = self.opc.copmap['exception match']
## If that was added by a while loop
if pos in self.__ignored_ifs:
return 1
# Check if we can test only for POP_TOP for this -Dan
# Maybe need to be done as above (skip SET_LINENO's)
if (ord(code[pos-3])==COMPARE_OP and
(ord(code[pos-2]) + ord(code[pos-1])*256)==EXCEPT_MATCH and
ord(code[pos+3])==POP_TOP and
ord(code[pos+4])==POP_TOP and
ord(code[pos+5])==POP_TOP and
ord(code[pos+6])==POP_TOP):
return 1 ## Exception match
return 0
def __fix_parent(self, code, target, parent):
"""Fix parent boundaries if needed"""
JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE']
start = parent['start']
end = parent['end']
## Map the second start point for 'while 1:' in python 2.3+ to start
try: target = self.__while1[target]
except: pass
if target >= start or end-start < 3 or target not in self.__loops:
return
if ord(code[end-3])==JUMP_ABSOLUTE:
cont_target = self.__get_target(code, end-3, JUMP_ABSOLUTE)
if target == cont_target:
parent['end'] = end-3
def __restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def __detect_structure(self, code, pos, op=None):
"""
Detect structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
SETUP_LOOP = self.opc.opmap['SETUP_LOOP']
FOR_ITER = self.opc.opmap['FOR_ITER']
GET_ITER = self.opc.opmap['GET_ITER']
SETUP_EXCEPT = self.opc.opmap['SETUP_EXCEPT']
JUMP_FORWARD = self.opc.opmap['JUMP_FORWARD']
JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE']
JUMP_IF_FALSE = self.opc.opmap['JUMP_IF_FALSE']
JUMP_IF_TRUE = self.opc.opmap['JUMP_IF_TRUE']
END_FINALLY = self.opc.opmap['END_FINALLY']
POP_TOP = self.opc.opmap['POP_TOP']
POP_BLOCK = self.opc.opmap['POP_BLOCK']
try: SET_LINENO = self.opc.opmap['SET_LINENO']
except: SET_LINENO = None
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = ord(code[pos])
## Detect parent structure
parent = self.__structs[0]
start = parent['start']
end = parent['end']
for s in self.__structs:
if s['type'] == 'LOGIC_TEST':
continue ## logic tests are not structure containers
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end < end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.__structs)
if op == SETUP_LOOP:
start = pos+3
# this is for python2.2. Maybe we can optimize and not call this for 2.3+ -Dan
while ord(code[start]) == SET_LINENO:
start += 3
start_op = ord(code[start])
while1 = False
if start_op in (JUMP_FORWARD, JUMP_ABSOLUTE):
## This is a while 1 (has a particular structure)
start = self.__get_target(code, start, start_op)
start = self.__restrict_to_parent(start, parent)
self.__while1[pos+3] = start ## map between the 2 start points
while1 = True
if start_op == JUMP_ABSOLUTE and ord(code[pos+6])==JUMP_IF_FALSE:
# special `while 1: pass` in python2.3
self.__fixed_jumps[pos+3] = start
target = self.__get_target(code, pos, op)
end = self.__restrict_to_parent(target, parent)
if target != end:
self.__fixed_jumps[pos] = end
jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE,
start, False)
assert(jump_back is not None)
target = self.__get_target(code, jump_back, JUMP_ABSOLUTE)
i = target
while i < jump_back and ord(code[i])==SET_LINENO:
i += 3
if ord(code[i]) in (FOR_ITER, GET_ITER):
loop_type = 'FOR'
else:
lookup = [JUMP_IF_FALSE, JUMP_IF_TRUE]
test = self.__first_instr(code, pos+3, jump_back, lookup, jump_back+3)
if test is None:
# this is a special while 1 structure in python 2.4
while1 = True
else:
#assert(test is not None)
test_target = self.__get_target(code, test)
test_target = self.__restrict_to_parent(test_target, parent)
next = (ord(code[test_target]), ord(code[test_target+1]))
if next == (POP_TOP, POP_BLOCK):
self.__ignored_ifs.append(test)
else:
while1 = True
if while1 == True:
loop_type = 'WHILE1'
else:
loop_type = 'WHILE'
self.__loops.append(target)
self.__structs.append({'type': loop_type,
'start': target,
'end': jump_back})
self.__structs.append({'type': loop_type + '_ELSE',
'start': jump_back+3,
'end': end})
elif self.__list_comprehension(code, pos, op):
get_iter = self.__first_instr(code, pos+7, end, GET_ITER)
for_iter = self.__first_instr(code, get_iter, end, FOR_ITER)
assert(get_iter is not None and for_iter is not None)
start = get_iter+1
target = self.__get_target(code, for_iter, FOR_ITER)
end = self.__restrict_to_parent(target, parent)
jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE,
start, False)
assert(jump_back is not None)
target = self.__get_target(code, jump_back, JUMP_ABSOLUTE)
start = self.__restrict_to_parent(target, parent)
self.__structs.append({'type': 'LIST_COMPREHENSION',
'start': start,
'end': jump_back})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.__get_target(code, pos, op)
# this should be redundant as it can't be out of boundaries -Dan
# check if it can be removed
end = self.__restrict_to_parent(target, parent)
if target != end:
#print "!!!!found except target != end: %s %s" % (target, end)
self.__fixed_jumps[pos] = end
## Add the try block
self.__structs.append({'type': 'TRY',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
start = end
target = self.__get_target(code, start-3)
#self.__fix_parent(code, target, parent)
try_else_start = target
end = self.__restrict_to_parent(target, parent)
if target != end:
self.__fixed_jumps[start-3] = end
end_finally = self.__last_instr(code, start, end, END_FINALLY)
assert(end_finally is not None)
lookup = [JUMP_ABSOLUTE, JUMP_FORWARD]
jump_end = self.__last_instr(code, start, end_finally, lookup)
assert(jump_end is not None)
target = self.__get_target(code, jump_end)
if target == try_else_start:
end = end_finally+1
else:
end = self.__restrict_to_parent(target, parent)
if target != end:
self.__fixed_jumps[jump_end] = end
## Add the try-else block
self.__structs.append({'type': 'TRY_ELSE',
'start': end_finally+1,
'end': end})
## Add the except blocks
i = start
while i < end_finally:
jmp = self.__next_except_jump(code, i, end_finally, target)
if jmp is None:
break
if i!=start and ord(code[i])==POP_TOP:
pos = i + 1
else:
pos = i
self.__structs.append({'type': 'EXCEPT',
'start': pos,
'end': jmp})
if target != end:
self.__fixed_jumps[jmp] = end
i = jmp+3
elif op == JUMP_ABSOLUTE:
## detect if we have a 'foo and bar and baz...' structure
## that was optimized (thus the presence of JUMP_ABSOLUTE)
return # no longer needed. just return. remove this elif later -Dan
if pos in self.__fixed_jumps:
return ## Already marked
if parent['end'] - pos < 7:
return
next = (ord(code[pos+3]), ord(code[pos+6]))
if next != (JUMP_IF_FALSE, POP_TOP):
return
end = self.__get_target(code, pos+3)
ifs = self.__all_instr(code, pos, end, JUMP_IF_FALSE, end)
## Test if all JUMP_IF_FALSE we have found belong to the
## structure (may not be needed but it doesn't hurt)
count = len(ifs)
if count < 2:
return
for jif in ifs[1:]:
before = ord(code[jif-3])
after = ord(code[jif+3])
if (before not in (JUMP_FORWARD, JUMP_ABSOLUTE) or
after != POP_TOP):
return
## All tests passed. Perform fixes
self.__ignored_ifs.extend(ifs)
for i in range(count-1):
self.__fixed_jumps[ifs[i]-3] = ifs[i+1]-3
elif op in (JUMP_IF_FALSE, JUMP_IF_TRUE):
if self.__ignore_if(code, pos):
return
start = pos+4 ## JUMP_IF_FALSE/TRUE + POP_TOP
target = self.__get_target(code, pos, op)
if parent['start'] <= target <= parent['end']:
if ord(code[target-3]) in (JUMP_ABSOLUTE, JUMP_FORWARD):
if_end = self.__get_target(code, target-3)
#self.__fix_parent(code, if_end, parent)
end = self.__restrict_to_parent(if_end, parent)
if ord(code[end-3]) == JUMP_ABSOLUTE:
else_end = self.__get_target(code, end-3)
if if_end == else_end and if_end in self.__loops:
end -= 3 ## skip the continue instruction
if if_end != end:
self.__fixed_jumps[target-3] = end
self.__structs.append({'type': 'IF_THEN',
'start': start,
'end': target-3})
self.__structs.append({'type': 'IF_ELSE',
'start': target+1,
'end': end})
else:
self.__structs.append({'type': 'LOGIC_TEST',
'start': start,
'end': target})
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after self.opc.findlables(), but here
for each target the number of jumps are counted.
"""
HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT
hasjrel = self.opc.hasjrel
hasjabs = self.opc.hasjabs
needFixing = (self.__pyversion >= 2.3)
n = len(code)
self.__structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.__loops = [] ## All loop entry points
self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point
self.__fixed_jumps = {} ## Map fixed jumps to their real destination
self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore
targets = {}
i = 0
while i < n:
op = ord(code[i])
if needFixing:
## Determine structures and fix jumps for 2.3+
self.__detect_structure(code, i, op)
if op >= HAVE_ARGUMENT:
label = self.__fixed_jumps.get(i)
if label is None:
oparg = ord(code[i+1]) + ord(code[i+2]) * 256
if op in hasjrel:
label = i + 3 + oparg
elif op in hasjabs:
# todo: absolute jumps
pass
if label is not None:
targets[label] = targets.get(label, 0) + 1
i += 3
else:
i += 1
return targets
def find_structures(self, code):
"""
Detect all structures in a byte code.
Return a mapping from offset to a list of keywords that should
be inserted at that position.
"""
HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT
n = len(code)
self.__structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.__loops = [] ## All loop entry points
self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point
self.__fixed_jumps = {} ## Map fixed jumps to their real destination
self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore
i = 0
while i < n:
op = ord(code[i])
if op >= HAVE_ARGUMENT:
i += 3
else:
i += 1
#from pprint import pprint
#print
#print "structures: ",
#pprint(self.__structs)
#print "loops: ",
#pprint(self.__loops)
#print "while1: ",
#pprint(self.__while1)
#print "fixed jumps: ",
#pprint(self.__fixed_jumps)
#print "ignored ifs: ",
#pprint(self.__ignored_ifs)
#print
points = {}
endpoints = {}
for s in self.__structs:
typ = s['type']
start = s['start']
end = s['end']
if typ == 'root':
continue
## startpoints of the outer structures must come first
## endpoints of the inner structures must come first
points.setdefault(start, []).append("%s_START" % typ)
endpoints.setdefault(end, []).insert(0, "%s_END" % typ)
for k, v in endpoints.items():
points.setdefault(k, []).extend(v)
#print "points: ",
#pprint(points)
#print
return points
# __scanners = {}
# def getscanner(version):
# if not __scanners.has_key(version):
# __scanners[version] = Scanner(version)
# return __scanners[version]
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
if PYTHON_VERSION == 2.3:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner23().disassemble(co)
for t in tokens:
print(t.format())
else:
print("Need to be Python 2.3 to demo; I am %s." %
PYTHON_VERSION)
# local variables:
# tab-width: 4
import uncompyle6.scanners.scanner24 as scan
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_23
JUMP_OPs = opcode_23.JUMP_OPs
# We base this off of 2.5 instead of the other way around
# because we cleaned things up this way.
# The history is that 2.7 support is the cleanest,
# then from that we got 2.6 and so on.
class Scanner23(scan.Scanner24):
def __init__(self, show_asm):
scan.Scanner24.__init__(self, show_asm)
# These are the only differences in initialization between
# 2.3-2.6
self.version = 2.3
self.genexpr_name = '<generator expression>';
return