Merge branch 'master' of github.com:rocky/python-uncompyle6 into ast-format

This commit is contained in:
rocky
2016-06-03 13:45:16 -04:00
28 changed files with 1458 additions and 231 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,4 @@
*.pyc
*_dis
*~
*.pyc
@@ -9,6 +10,7 @@
/__pkginfo__.pyc
/dist
/how-to-make-a-release.txt
/nose-*.egg
/tmp
/uncompyle6.egg-info
__pycache__

View File

@@ -84,14 +84,18 @@ for usage help.
Known Bugs/Restrictions
-----------------------
Python 2 deparsing decompiles about the first 140 or so of the Python
2.7.10 and 2.7.11 standard library files and all but less that 10%
verify. So as such, it is probably a little better than uncompyle2.
Other Python 2 versions do worse.
Python 2 deparsing decompiles each and all the Python 2.7.10 and
2.7.11 installed packages I have on my system, more than 90% verify
ok. Some of these failures may be bugs in the verification process. So
as such, it is probably a little better than uncompyle2. Other Python
2 versions do worse.
Python 3 deparsing before 3.5 is okay, but even there, more work is needed to
decompile all of its library. Python 3.5 is missing some of new
opcodes and idioms added, but it still often works.
More than 90% the Python 3.3, and 3.4 Python packages that I have
installed on my system deparse. Python 3.2 fares at a little less than
90%. (Each Python version has about 200 byteocde files). All of the
bytecode deparses also verify. Python is more problematic and is
missing some of new opcodes and idioms added. But it still often
works.
There is lots to do, so please dig in and help.

View File

@@ -37,7 +37,7 @@ entry_points={
]}
ftp_url = None
install_requires = ['spark-parser >= 1.2.1',
'xdis >= 1.1.0']
'xdis >= 1.1.1']
license = 'MIT'
mailing_list = 'python-debugger@googlegroups.com'
modname = 'uncompyle6'

View File

@@ -20,7 +20,7 @@ check:
$(MAKE) check-$$PYTHON_VERSION
#: Run working tests from Python 2.6 or 2.7
check-2.6 check-2.7: check-bytecode check-2.7-ok
check-2.6 check-2.7: check-bytecode-2 check-bytecode-3 check-2.7-ok
#: Run working tests from Python 3.2
check-3.2: check-bytecode
@@ -41,14 +41,21 @@ check-3.4: check-bytecode check-3.4-ok check-2.7-ok
check-disasm:
$(PYTHON) dis-compare.py
#: Check deparsing bytecode only
#: Check deparsing bytecode 2.x only
check-bytecode-2:
$(PYTHON) test_pythonlib.py --bytecode-2.3 --bytecode-2.5 --bytecode-2.6 --bytecode-2.7
#: Check deparsing bytecode 3.x only
check-bytecode-3:
$(PYTHON) test_pythonlib.py --bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5
#: Check deparsing bytecode that works running Python 2 and Python 3
check-bytecode: check-bytecode-3
$(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.6 --bytecode-2.7
#: Check deparsing bytecode only
check-bytecode:
$(PYTHON) test_pythonlib.py --bytecode-2.5 --bytecode-2.6 --bytecode-2.7 \
--bytecode-3.2 --bytecode-3.3 --bytecode-3.4 --bytecode-3.5
#: Check deparsing Python 2.3
check-bytecode-2.3:
$(PYTHON) test_pythonlib.py --bytecode-2.3
#: Check deparsing Python 2.5
check-bytecode-2.5:

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,18 @@
# Bug from python2.6/SimpleXMLRPCServer.py
# The problem in 2.6 is handling
# 72 JUMP_ABSOLUTE 17 (to 17)
# 75 POP_TOP
# 76 JUMP_ABSOLUTE 17 (to 17)
# And getting:
# list_for ::= expr _for designator list_iter JUMP_BACK
# list_iter ::= list_if JUMP_BACK
# ^^^^^^^^^ added to 2.6 grammar
# list_iter ::= list_for
def list_public_methods(obj):
return [member for member in dir(obj)
if not member.startswith('_') and
hasattr(getattr(obj, member), '__call__')]

View File

@@ -27,7 +27,7 @@ from fnmatch import fnmatch
#----- configure this for your needs
TEST_VERSIONS=('2.6.9', '2.7.10', '2.7.11', '3.2.6', '3.3.5', '3.4.2')
TEST_VERSIONS=('2.3.7', '2.6.9', '2.7.10', '2.7.11', '3.2.6', '3.3.5', '3.4.2')
target_base = '/tmp/py-dis/'
lib_prefix = os.path.join(os.environ['HOME'], '.pyenv/versions')

View File

@@ -78,7 +78,7 @@ for vers in (2.7, 3.4, 3.5):
test_options[key] = (os.path.join(src_dir, pythonlib), PYOC, key, vers)
pass
for vers in (2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5):
for vers in (2.3, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5):
bytecode = "bytecode_%s" % vers
key = "bytecode-%s" % vers
test_options[key] = (bytecode, PYC, bytecode, vers)

View File

@@ -463,11 +463,27 @@ def get_python_parser(version, debug_parser, compile_mode='exec'):
# FIXME: there has to be a better way...
if version < 3.0:
if version == 2.3:
import uncompyle6.parsers.parse23 as parse23
if compile_mode == 'exec':
p = parse23.Python23Parser(debug_parser)
else:
p = parse23.Python23ParserSingle(debug_parser)
elif version == 2.6:
import uncompyle6.parsers.parse26 as parse26
if compile_mode == 'exec':
p = parse26.Python26Parser(debug_parser)
else:
p = parse26.Python26ParserSingle(debug_parser)
else:
import uncompyle6.parsers.parse2 as parse2
if compile_mode == 'exec':
p = parse2.Python2Parser(debug_parser)
else:
p = parse2.Python2ParserSingle(debug_parser)
pass
pass
pass
else:
import uncompyle6.parsers.parse3 as parse3
if version == 3.2:

View File

@@ -1,9 +1,6 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 Rocky Bernstein
#
# See LICENSE for license
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
"""
A spark grammar for Python 2.x.
@@ -20,14 +17,10 @@ from __future__ import print_function
from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func
from uncompyle6.parsers.astnode import AST
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6 import PYTHON3
class Python2Parser(PythonParser):
def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG):
if PYTHON3:
super().__init__(AST, 'stmts', debug=debug_parser)
else:
super(Python2Parser, self).__init__(AST, 'stmts', debug=debug_parser)
self.customized = {}

View File

@@ -0,0 +1,536 @@
# Copyright (c) 2016 Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 1999 John Aycock
import string
from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parsers.astnode import AST
from uncompyle6.parser import PythonParserSingle, ParserError, nop_func
class Python23Parser(GenericASTBuilder):
def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG):
GenericASTBuilder.__init__(self, AST, 'stmts', debug=debug_parser)
self.customized = {}
def cleanup(self):
"""
Remove recursive references to allow garbage
collector to collect this object.
"""
for dict in (self.rule2func, self.rules, self.rule2name, self.first):
for i in dict.keys():
dict[i] = None
for i in dir(self):
setattr(self, i, None)
def error(self, token):
raise ParserError(token, token.offset)
def typestring(self, token):
return token.type
def p_funcdef(self, args):
'''
stmt ::= funcdef
funcdef ::= mkfunc designator
load_closure ::= load_closure LOAD_CLOSURE
load_closure ::= LOAD_CLOSURE
'''
def p_list_comprehension(self, args):
'''
expr ::= list_compr
list_compr ::= BUILD_LIST_0 DUP_TOP _load_attr
designator list_iter del_stmt
list_iter ::= list_for
list_iter ::= list_if
list_iter ::= lc_body
_load_attr ::= LOAD_ATTR
_load_attr ::=
_lcfor ::= GET_ITER LIST_COMPREHENSION_START FOR_ITER
_lcfor ::= LOAD_CONST FOR_LOOP
_lcfor2 ::= GET_ITER FOR_ITER
_lcfor2 ::= LOAD_CONST FOR_LOOP
list_for ::= expr _lcfor designator list_iter
LIST_COMPREHENSION_END JUMP_ABSOLUTE
list_for ::= expr _lcfor2 designator list_iter
JUMP_ABSOLUTE
list_if ::= expr condjmp IF_THEN_START list_iter
IF_THEN_END _jump POP_TOP IF_ELSE_START IF_ELSE_END
lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP
lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP
lc_body ::= LOAD_NAME expr LIST_APPEND
lc_body ::= LOAD_FAST expr LIST_APPEND
'''
def p_augmented_assign(self, args):
'''
stmt ::= augassign1
stmt ::= augassign2
augassign1 ::= expr expr inplace_op designator
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
inplace_op ROT_TWO STORE_ATTR
inplace_op ::= INPLACE_ADD
inplace_op ::= INPLACE_SUBTRACT
inplace_op ::= INPLACE_MULTIPLY
inplace_op ::= INPLACE_DIVIDE
inplace_op ::= INPLACE_TRUE_DIVIDE
inplace_op ::= INPLACE_FLOOR_DIVIDE
inplace_op ::= INPLACE_MODULO
inplace_op ::= INPLACE_POWER
inplace_op ::= INPLACE_LSHIFT
inplace_op ::= INPLACE_RSHIFT
inplace_op ::= INPLACE_AND
inplace_op ::= INPLACE_XOR
inplace_op ::= INPLACE_OR
'''
def p_assign(self, args):
'''
stmt ::= assign
assign ::= expr DUP_TOP designList
assign ::= expr designator
'''
def p_print(self, args):
'''
stmt ::= print_stmt
stmt ::= print_stmt_nl
stmt ::= print_nl_stmt
print_stmt ::= expr PRINT_ITEM
print_nl_stmt ::= PRINT_NEWLINE
print_stmt_nl ::= print_stmt print_nl_stmt
'''
def p_print_to(self, args):
'''
stmt ::= print_to
stmt ::= print_to_nl
stmt ::= print_nl_to
print_to ::= expr print_to_items POP_TOP
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
print_nl_to ::= expr PRINT_NEWLINE_TO
print_to_items ::= print_to_items print_to_item
print_to_items ::= print_to_item
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
'''
# expr print_to* POP_TOP
# expr { print_to* } PRINT_NEWLINE_TO
def p_import15(self, args):
'''
stmt ::= importstmt
stmt ::= importfrom
importstmt ::= IMPORT_NAME STORE_FAST
importstmt ::= IMPORT_NAME STORE_NAME
importfrom ::= IMPORT_NAME importlist POP_TOP
importlist ::= importlist IMPORT_FROM
importlist ::= IMPORT_FROM
'''
# Python 2.0 - 2.3 imports
def p_import20_23(self, args):
'''
stmt ::= importstmt20
stmt ::= importfrom20
stmt ::= importstar20
importstmt20 ::= LOAD_CONST import_as
importstar20 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR
importfrom20 ::= LOAD_CONST IMPORT_NAME importlist20 POP_TOP
importlist20 ::= importlist20 import_as
importlist20 ::= import_as
import_as ::= IMPORT_NAME designator
import_as ::= IMPORT_NAME LOAD_ATTR designator
import_as ::= IMPORT_FROM designator
'''
def p_grammar(self, args):
'''
stmts ::= stmts stmt
stmts ::= stmt
stmts_opt ::= stmts
stmts_opt ::= passstmt
passstmt ::=
designList ::= designator designator
designList ::= designator DUP_TOP designList
designator ::= STORE_FAST
designator ::= STORE_NAME
designator ::= STORE_GLOBAL
designator ::= STORE_DEREF
designator ::= expr STORE_ATTR
designator ::= expr STORE_SLICE+0
designator ::= expr expr STORE_SLICE+1
designator ::= expr expr STORE_SLICE+2
designator ::= expr expr expr STORE_SLICE+3
designator ::= store_subscr
store_subscr ::= expr expr STORE_SUBSCR
designator ::= unpack
designator ::= unpack_list
stmt ::= classdef
stmt ::= call_stmt
call_stmt ::= expr POP_TOP
stmt ::= return_stmt
return_stmt ::= expr RETURN_VALUE
stmt ::= yield_stmt
yield_stmt ::= expr YIELD_STMT
yield_stmt ::= expr YIELD_VALUE
stmt ::= break_stmt
break_stmt ::= BREAK_LOOP
stmt ::= continue_stmt
continue_stmt ::= JUMP_ABSOLUTE
continue_stmt ::= CONTINUE_LOOP
stmt ::= raise_stmt
raise_stmt ::= exprlist RAISE_VARARGS
raise_stmt ::= nullexprlist RAISE_VARARGS
stmt ::= exec_stmt
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
exec_stmt ::= expr exprlist EXEC_STMT
stmt ::= assert
stmt ::= assert2
stmt ::= assert3
stmt ::= assert4
stmt ::= ifstmt
stmt ::= ifelsestmt
stmt ::= whilestmt
stmt ::= while1stmt
stmt ::= while12stmt
stmt ::= whileelsestmt
stmt ::= while1elsestmt
stmt ::= while12elsestmt
stmt ::= forstmt
stmt ::= forelsestmt
stmt ::= trystmt
stmt ::= tryfinallystmt
stmt ::= del_stmt
del_stmt ::= DELETE_FAST
del_stmt ::= DELETE_NAME
del_stmt ::= DELETE_GLOBAL
del_stmt ::= expr DELETE_SLICE+0
del_stmt ::= expr expr DELETE_SLICE+1
del_stmt ::= expr expr DELETE_SLICE+2
del_stmt ::= expr expr expr DELETE_SLICE+3
del_stmt ::= delete_subscr
delete_subscr ::= expr expr DELETE_SUBSCR
del_stmt ::= expr DELETE_ATTR
kwarg ::= LOAD_CONST expr
classdef ::= LOAD_CONST expr mkfunc
CALL_FUNCTION_0 BUILD_CLASS designator
condjmp ::= JUMP_IF_FALSE POP_TOP
condjmp ::= JUMP_IF_TRUE POP_TOP
assert ::= expr JUMP_IF_FALSE POP_TOP
LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS
LOGIC_TEST_END LOGIC_TEST_END POP_TOP
assert2 ::= expr JUMP_IF_FALSE POP_TOP
LOGIC_TEST_START expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS
LOGIC_TEST_END LOGIC_TEST_END POP_TOP
assert3 ::= expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL RAISE_VARARGS
LOGIC_TEST_END POP_TOP
assert4 ::= expr JUMP_IF_TRUE POP_TOP
LOGIC_TEST_START LOAD_GLOBAL expr RAISE_VARARGS
LOGIC_TEST_END POP_TOP
_jump ::= JUMP_ABSOLUTE
_jump ::= JUMP_FORWARD
ifstmt ::= expr condjmp
IF_THEN_START stmts_opt IF_THEN_END
_jump POP_TOP IF_ELSE_START IF_ELSE_END
ifelsestmt ::= expr condjmp
IF_THEN_START stmts_opt IF_THEN_END
_jump POP_TOP IF_ELSE_START stmts IF_ELSE_END
trystmt ::= SETUP_EXCEPT TRY_START stmts_opt
TRY_END POP_BLOCK _jump
except_stmt
try_end ::= END_FINALLY TRY_ELSE_START TRY_ELSE_END
try_end ::= except_else
except_else ::= END_FINALLY TRY_ELSE_START stmts TRY_ELSE_END
except_stmt ::= except_cond except_stmt
except_stmt ::= except_conds try_end
except_stmt ::= except try_end
except_stmt ::= try_end
except_conds ::= except_cond except_conds
except_conds ::=
except_cond ::= except_cond1
except_cond ::= except_cond2
except_cond1 ::= EXCEPT_START DUP_TOP expr COMPARE_OP
JUMP_IF_FALSE
POP_TOP POP_TOP POP_TOP POP_TOP
stmts_opt EXCEPT_END _jump POP_TOP
except_cond2 ::= EXCEPT_START DUP_TOP expr COMPARE_OP
JUMP_IF_FALSE
POP_TOP POP_TOP designator POP_TOP
stmts_opt EXCEPT_END _jump POP_TOP
except ::= EXCEPT_START POP_TOP POP_TOP POP_TOP
stmts_opt EXCEPT_END _jump
tryfinallystmt ::= SETUP_FINALLY stmts_opt
POP_BLOCK LOAD_CONST
stmts_opt END_FINALLY
_while1test ::= _jump JUMP_IF_FALSE POP_TOP
_while1test ::=
whilestmt ::= SETUP_LOOP WHILE_START
expr condjmp
stmts_opt WHILE_END JUMP_ABSOLUTE
WHILE_ELSE_START POP_TOP POP_BLOCK WHILE_ELSE_END
while1stmt ::= SETUP_LOOP _while1test WHILE1_START
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END
while12stmt ::= SETUP_LOOP WHILE1_START
_jump JUMP_IF_FALSE POP_TOP
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK WHILE1_ELSE_END
whileelsestmt ::= SETUP_LOOP WHILE_START
expr condjmp
stmts_opt WHILE_END JUMP_ABSOLUTE
WHILE_ELSE_START POP_TOP POP_BLOCK
stmts WHILE_ELSE_END
while1elsestmt ::= SETUP_LOOP _while1test WHILE1_START
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK
stmts WHILE1_ELSE_END
while12elsestmt ::= SETUP_LOOP WHILE1_START
_jump JUMP_IF_FALSE POP_TOP
stmts_opt WHILE1_END JUMP_ABSOLUTE
WHILE1_ELSE_START POP_TOP POP_BLOCK
stmts WHILE1_ELSE_END
_for ::= GET_ITER FOR_START FOR_ITER
_for ::= LOAD_CONST FOR_LOOP
forstmt ::= SETUP_LOOP expr _for designator
stmts_opt FOR_END JUMP_ABSOLUTE
FOR_ELSE_START POP_BLOCK FOR_ELSE_END
forelsestmt ::= SETUP_LOOP expr _for designator
stmts_opt FOR_END JUMP_ABSOLUTE
FOR_ELSE_START POP_BLOCK stmts FOR_ELSE_END
'''
def p_expr(self, args):
'''
expr ::= load_closure mklambda
expr ::= mklambda
expr ::= SET_LINENO
expr ::= LOAD_FAST
expr ::= LOAD_NAME
expr ::= LOAD_CONST
expr ::= LOAD_GLOBAL
expr ::= LOAD_DEREF
expr ::= LOAD_LOCALS
expr ::= expr LOAD_ATTR
expr ::= binary_expr
expr ::= build_list
binary_expr ::= expr expr binary_op
binary_op ::= BINARY_ADD
binary_op ::= BINARY_SUBTRACT
binary_op ::= BINARY_MULTIPLY
binary_op ::= BINARY_DIVIDE
binary_op ::= BINARY_TRUE_DIVIDE
binary_op ::= BINARY_FLOOR_DIVIDE
binary_op ::= BINARY_MODULO
binary_op ::= BINARY_LSHIFT
binary_op ::= BINARY_RSHIFT
binary_op ::= BINARY_AND
binary_op ::= BINARY_OR
binary_op ::= BINARY_XOR
binary_op ::= BINARY_POWER
expr ::= binary_subscr
binary_subscr ::= expr expr BINARY_SUBSCR
expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
expr ::= cmp
expr ::= expr UNARY_POSITIVE
expr ::= expr UNARY_NEGATIVE
expr ::= expr UNARY_CONVERT
expr ::= expr UNARY_INVERT
expr ::= expr UNARY_NOT
expr ::= mapexpr
expr ::= expr SLICE+0
expr ::= expr expr SLICE+1
expr ::= expr expr SLICE+2
expr ::= expr expr expr SLICE+3
expr ::= expr DUP_TOP SLICE+0
expr ::= expr expr DUP_TOPX_2 SLICE+1
expr ::= expr expr DUP_TOPX_2 SLICE+2
expr ::= expr expr expr DUP_TOPX_3 SLICE+3
expr ::= and
expr ::= and2
expr ::= or
or ::= expr JUMP_IF_TRUE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END
and ::= expr JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END
and2 ::= _jump JUMP_IF_FALSE POP_TOP LOGIC_TEST_START expr LOGIC_TEST_END
cmp ::= cmp_list
cmp ::= compare
compare ::= expr expr COMPARE_OP
cmp_list ::= expr cmp_list1 ROT_TWO IF_ELSE_START POP_TOP
IF_ELSE_END
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
cmp_list1
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
IF_THEN_START cmp_list1
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
IF_THEN_START cmp_list2
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE POP_TOP
cmp_list2
cmp_list2 ::= expr COMPARE_OP IF_THEN_END JUMP_FORWARD
mapexpr ::= BUILD_MAP kvlist
kvlist ::= kvlist kv
kvlist ::= kvlist kv2
kvlist ::=
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
exprlist ::= exprlist expr
exprlist ::= expr
nullexprlist ::=
'''
def nonterminal(self, nt, args):
collect = ('stmts', 'exprlist', 'kvlist')
if nt in collect and len(args) > 1:
#
# Collect iterated thingies together.
#
rv = args[0]
rv.append(args[1])
else:
rv = GenericASTBuilder.nonterminal(self, nt, args)
return rv
def __ambiguity(self, children):
# only for debugging! to be removed hG/2000-10-15
print children
return GenericASTBuilder.ambiguity(self, children)
def resolve(self, list):
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
return 'funcdef'
#print >> sys.stderr, 'resolve', str(list)
return GenericASTBuilder.resolve(self, list)
def add_custom_rules(self, tokens, customize):
"""
Special handling for opcodes that take a variable number
of arguments -- we add a new rule for each:
expr ::= {expr}^n BUILD_LIST_n
expr ::= {expr}^n BUILD_TUPLE_n
expr ::= {expr}^n BUILD_SLICE_n
unpack_list ::= UNPACK_LIST {expr}^n
unpack ::= UNPACK_TUPLE {expr}^n
unpack ::= UNPACK_SEQEUENE {expr}^n
mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
"""
for k, v in customize.items():
# avoid adding the same rule twice to this parser
if self.customized.has_key(k):
continue
self.customized[k] = None
#nop_func = lambda self, args: None
op = k[:string.rfind(k, '_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op == 'BUILD_SLICE':
rule = 'expr ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op == 'DUP_TOPX':
# no need to add a rule
continue
#rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
self.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop_func)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
self.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop_func)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) / 3
rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise 'unknown customize token %s' % k
self.addRule(rule, nop_func)
return
pass
class Python23ParserSingle(Python23Parser, PythonParserSingle):
pass
# local variables:
# tab-width: 4

View File

@@ -0,0 +1,23 @@
# Copyright (c) 2016 Rocky Bernstein
"""
spark grammar differences over Python2 for Python 2.6.
"""
from uncompyle6.parser import PythonParserSingle
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parsers.parse2 import Python2Parser
class Python26Parser(Python2Parser):
def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG):
super(Python26Parser, self).__init__(debug_parser)
self.customized = {}
def p_lis_iter(self, args):
'''
list_iter ::= list_if JUMP_BACK
'''
class Python26ParserSingle(Python2Parser, PythonParserSingle):
pass

View File

@@ -21,6 +21,13 @@ import sys
from uncompyle6 import PYTHON3
from uncompyle6.scanners.tok import Token
# The byte code versions we support
if PYTHON3:
# Need to work out Python 2.3. ord's in PYTHON3
PYTHON_VERSIONS = (2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5)
else:
PYTHON_VERSIONS = (2.3, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4, 3.5)
# FIXME: DRY
if PYTHON3:
intern = sys.intern
@@ -45,30 +52,14 @@ class Code(object):
class Scanner(object):
def __init__(self, version):
def __init__(self, version, show_asm=None):
self.version = version
# FIXME: DRY
if version == 2.7:
from xdis.opcodes import opcode_27
self.opc = opcode_27
elif version == 2.6:
from xdis.opcodes import opcode_26
self.opc = opcode_26
elif version == 2.5:
from xdis.opcodes import opcode_25
self.opc = opcode_25
elif version == 3.2:
from xdis.opcodes import opcode_32
self.opc = opcode_32
elif version == 3.3:
from xdis.opcodes import opcode_33
self.opc = opcode_33
elif version == 3.4:
from xdis.opcodes import opcode_34
self.opc = opcode_34
elif version == 3.5:
from xdis.opcodes import opcode_35
self.opc = opcode_35
self.show_asm = show_asm
if version in PYTHON_VERSIONS:
v_str = "opcode_%s" % (int(version * 10))
exec("from xdis.opcodes import %s" % v_str)
exec("self.opc = %s" % v_str)
else:
raise TypeError("%s is not a Python version I know about" % version)
@@ -281,33 +272,18 @@ class Scanner(object):
target = parent['end']
return target
def get_scanner(version):
def get_scanner(version, show_asm=None):
# Pick up appropriate scanner
# from trepan.api import debug;
# debug(start_opts={'startup-profile': True})
# FIXME: see if we can do better
if version == 2.7:
import uncompyle6.scanners.scanner27 as scan
scanner = scan.Scanner27()
elif version == 2.6:
import uncompyle6.scanners.scanner26 as scan
scanner = scan.Scanner26()
elif version == 2.5:
import uncompyle6.scanners.scanner25 as scan
scanner = scan.Scanner25()
elif version == 3.2:
import uncompyle6.scanners.scanner32 as scan
scanner = scan.Scanner32()
elif version == 3.3:
import uncompyle6.scanners.scanner33 as scan
scanner = scan.Scanner33()
elif version == 3.4:
import uncompyle6.scanners.scanner34 as scan
scanner = scan.Scanner34()
elif version == 3.5:
import uncompyle6.scanners.scanner35 as scan
scanner = scan.Scanner35()
if version in PYTHON_VERSIONS:
v_str = "%s" % (int(version * 10))
exec("import uncompyle6.scanners.scanner%s as scan" % v_str)
if PYTHON3:
import importlib
scan = importlib.import_module("uncompyle6.scanners.scanner%s" % v_str)
if False: print(scan) # Avoid unused scan
else:
exec("import uncompyle6.scanners.scanner%s as scan" % v_str)
scanner = eval("scan.Scanner%s(show_asm=show_asm)" % v_str)
else:
raise RuntimeError("Unsupported Python version %s" % version)
return scanner
@@ -315,9 +291,5 @@ def get_scanner(version):
if __name__ == "__main__":
import inspect, uncompyle6
co = inspect.currentframe().f_code
scanner = get_scanner(uncompyle6.PYTHON_VERSION)
scanner = get_scanner(uncompyle6.PYTHON_VERSION, True)
tokens, customize = scanner.disassemble(co, {})
print('-' * 30)
for t in tokens:
print(t)
pass

View File

@@ -32,12 +32,12 @@ from xdis.bytecode import findlinestarts
import uncompyle6.scanner as scan
class Scanner2(scan.Scanner):
def __init__(self, version):
scan.Scanner.__init__(self, version)
def __init__(self, version, show_asm=None):
scan.Scanner.__init__(self, version, show_asm)
self.pop_jump_if = frozenset([self.opc.PJIF, self.opc.PJIT])
self.jump_forward = frozenset([self.opc.JA, self.opc.JF])
def disassemble(self, co, classname=None, code_objects={}):
def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
"""
Disassemble a Python 2 code object, returning a list of 'Token'.
Various tranformations are made to assist the deparsing grammar.
@@ -49,9 +49,12 @@ class Scanner2(scan.Scanner):
dis.disassemble().
"""
## FIXME: DRY with disassemble_native
# import dis; dis.disassemble(co) # DEBUG
show_asm = self.show_asm if not show_asm else show_asm
if self.show_asm in ('both', 'before'):
from xdis.bytecode import Bytecode
bytecode = Bytecode(co, self.opc)
for instr in bytecode.get_instructions(co):
print(instr._disassemble())
# Container for tokens
tokens = []
@@ -60,6 +63,7 @@ class Scanner2(scan.Scanner):
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
self.build_prev_op(n)
@@ -201,75 +205,13 @@ class Scanner2(scan.Scanner):
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
else:
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
return tokens, customize
def disassemble_native(self, co, classname=None, code_objects={}):
"""
Like disassemble3 but doesn't try to adjust any opcodes.
"""
## FIXME: DRY with disassemble
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
# self.lines contains (block,addrLastInstr)
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
extended_arg = 0
for offset in self.op_range(0, n):
op = self.code[offset]
op_name = self.opc.opname[op]
oparg = None; pattr = None
if op >= self.opc.HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == self.opc.EXTENDED_ARG:
extended_arg = oparg * scan.L65536
continue
if op in self.opc.hasconst:
pattr = co.co_consts[oparg]
elif op in self.opc.hasname:
pattr = names[oparg]
elif op in self.opc.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in self.opc.hasjabs:
pattr = repr(oparg)
elif op in self.opc.haslocal:
pattr = varnames[oparg]
elif op in self.opc.hascompare:
pattr = self.opc.cmp_op[oparg]
elif op in self.opc.hasfree:
pattr = free[oparg]
if offset in self.linestartoffsets:
linestart = self.linestartoffsets[offset]
else:
linestart = None
tokens.append(Token(op_name, oparg, pattr, offset, linestart))
pass
pass
if self.show_asm in ('both', 'after'):
for t in tokens:
print(t)
print()
return tokens, customize
def op_size(self, op):

View File

@@ -0,0 +1,709 @@
# Copyright (c) 2016 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
"""
Python 2.3 bytecode scanner
This overlaps Python's 2.3's dis module, but it can be run from Python 3 and
other versions of Python. Also, we save token information for later
use in deparsing.
"""
from uncompyle6.scanners.scanner2 import Scanner2
from uncompyle6.scanner import L65536
class Scanner23(Scanner2):
def __init__(self, show_asm=None):
super(Scanner23, self).__init__(2.3, show_asm)
def disassemble(self, co, code_objects={}, show_asm=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
if self.show_asm in ('both', 'before'):
from xdis.bytecode import Bytecode
bytecode = Bytecode(co, self.opc)
for instr in bytecode.get_instructions(co):
print(instr._disassemble())
# Container for tokens
tokens = []
customize = {}
Token = self.Token # shortcut
self.code = co.co_code
structures = self.find_structures(self.code)
#cf = self.find_jump_targets(code)
n = len(self.code)
i = 0
extended_arg = 0
free = None
while i < n:
offset = i
if structures.has_key(offset):
j = 0
for elem in structures[offset]:
tokens.append(Token(elem, offset="%s_%d" % (offset, j)))
j += 1
c = self.code[i]
op = ord(c)
opname = self.opc.opname[op]
i += 1
oparg = None; pattr = None
if op >= self.opc.HAVE_ARGUMENT:
oparg = ord(self.code[i]) + ord(self.code[i+1]) * 256 + extended_arg
extended_arg = 0
i += 2
if op == self.opc.EXTENDED_ARG:
extended_arg = oparg * L65536
if op in self.opc.hasconst:
const = co.co_consts[oparg]
# We can't use inspect.iscode() because we may be
# using a different version of Python than the
# one that this was byte-compiled on. So the code
# types may mismatch.
if hasattr(const, 'co_name'):
oparg = const
const = oparg
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
# verify uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in self.opc.hasname:
pattr = co.co_names[oparg]
elif op in self.opc.hasjrel:
pattr = repr(i + oparg)
elif op in self.opc.hasjabs:
pattr = repr(oparg)
elif op in self.opc.haslocal:
pattr = co.co_varnames[oparg]
elif op in self.opc.hascompare:
pattr = self.opc.cmp_op[oparg]
elif op in self.opc.hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
pattr = free[oparg]
if opname == 'SET_LINENO':
continue
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE',
'UNPACK_LIST', 'UNPACK_TUPLE', 'UNPACK_SEQUENCE',
'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE',
'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
'CALL_FUNCTION_VAR_KW', 'DUP_TOPX',
):
opname = '%s_%d' % (opname, oparg)
customize[opname] = oparg
tokens.append(Token(opname, oparg, pattr, offset))
pass
if self.show_asm:
for t in tokens:
print(t)
print()
return tokens, customize
def __get_target(self, code, pos, op=None):
if op is None:
op = ord(code[pos])
target = ord(code[pos+1]) + ord(code[pos+2]) * 256
if op in self.self.opc.hasjrel:
target += pos + 3
return target
def __first_instr(self, code, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
assert(start>=0 and end<len(code))
HAVE_ARGUMENT = self.self.opc.HAVE_ARGUMENT
try: instr[0]
except: instr = [instr]
pos = None
distance = len(code)
i = start
while i < end:
op = ord(code[i])
if op in instr:
if target is None:
return i
dest = self.__get_target(code, i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
if op < HAVE_ARGUMENT:
i += 1
else:
i += 3
return pos
def __last_instr(self, code, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
assert(start>=0 and end<len(code))
HAVE_ARGUMENT = self.self.opc.HAVE_ARGUMENT
try: instr[0]
except: instr = [instr]
pos = None
distance = len(code)
i = start
while i < end:
op = ord(code[i])
if op in instr:
if target is None:
pos = i
else:
dest = self.__get_target(code, i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
if op < HAVE_ARGUMENT:
i += 1
else:
i += 3
return pos
def __all_instr(self, code, start, end, instr, target=None):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
assert(start>=0 and end<len(code))
HAVE_ARGUMENT = self.self.opc.HAVE_ARGUMENT
try: instr[0]
except: instr = [instr]
result = []
i = start
while i < end:
op = ord(code[i])
if op in instr:
if target is None:
result.append(i)
elif target == self.__get_target(code, i, op):
result.append(i)
if op < HAVE_ARGUMENT:
i += 1
else:
i += 3
return result
def __next_except_jump(self, code, start, end, target):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT
JUMP_FORWARD = self.opc.opmap['JUMP_FORWARD']
JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE']
END_FINALLY = self.opc.opmap['END_FINALLY']
POP_TOP = self.opc.opmap['POP_TOP']
DUP_TOP = self.opc.opmap['DUP_TOP']
try: SET_LINENO = self.opc.opmap['SET_LINENO']
except: SET_LINENO = None
lookup = [JUMP_ABSOLUTE, JUMP_FORWARD]
while start < end:
jmp = self.__first_instr(code, start, end, lookup, target)
if jmp is None:
return None
if jmp == end-3:
return jmp
ops = [None, None, None, None]
opp = [0, 0, 0, 0]
pos = 0
x = jmp+3
while x <= end and pos < 4:
op = ord(code[x])
if op == SET_LINENO:
x += 3
continue
elif op >= HAVE_ARGUMENT:
break
ops[pos] = op
opp[pos] = x
pos += 1
x += 1
if ops[0] == POP_TOP and ops[1] == END_FINALLY and opp[1] == end:
return jmp
if ops[0] == POP_TOP and ops[1] == DUP_TOP:
return jmp
if ops[0] == ops[1] == ops[2] == ops[3] == POP_TOP:
return jmp
start = jmp + 3
return None
def __list_comprehension(self, code, pos, op=None):
"""
Determine if there is a list comprehension structure starting at pos
"""
BUILD_LIST = self.opc.opmap['BUILD_LIST']
DUP_TOP = self.opc.opmap['DUP_TOP']
LOAD_ATTR = self.opc.opmap['LOAD_ATTR']
if op is None:
op = ord(code[pos])
if op != BUILD_LIST:
return 0
try:
elems = ord(code[pos+1]) + ord(code[pos+2])*256
codes = (op, elems, ord(code[pos+3]), ord(code[pos+4]))
except IndexError:
return 0
return (codes==(BUILD_LIST, 0, DUP_TOP, LOAD_ATTR))
def __ignore_if(self, code, pos):
"""
Return true if this 'if' is to be ignored.
"""
POP_TOP = self.opc.opmap['POP_TOP']
COMPARE_OP = self.opc.opmap['COMPARE_OP']
EXCEPT_MATCH = self.opc.copmap['exception match']
## If that was added by a while loop
if pos in self.__ignored_ifs:
return 1
# Check if we can test only for POP_TOP for this -Dan
# Maybe need to be done as above (skip SET_LINENO's)
if (ord(code[pos-3])==COMPARE_OP and
(ord(code[pos-2]) + ord(code[pos-1])*256)==EXCEPT_MATCH and
ord(code[pos+3])==POP_TOP and
ord(code[pos+4])==POP_TOP and
ord(code[pos+5])==POP_TOP and
ord(code[pos+6])==POP_TOP):
return 1 ## Exception match
return 0
def __fix_parent(self, code, target, parent):
"""Fix parent boundaries if needed"""
JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE']
start = parent['start']
end = parent['end']
## Map the second start point for 'while 1:' in python 2.3+ to start
try: target = self.__while1[target]
except: pass
if target >= start or end-start < 3 or target not in self.__loops:
return
if ord(code[end-3])==JUMP_ABSOLUTE:
cont_target = self.__get_target(code, end-3, JUMP_ABSOLUTE)
if target == cont_target:
parent['end'] = end-3
def __restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def __detect_structure(self, code, pos, op=None):
"""
Detect structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
SETUP_LOOP = self.opc.opmap['SETUP_LOOP']
FOR_ITER = self.opc.opmap['FOR_ITER']
GET_ITER = self.opc.opmap['GET_ITER']
SETUP_EXCEPT = self.opc.opmap['SETUP_EXCEPT']
JUMP_FORWARD = self.opc.opmap['JUMP_FORWARD']
JUMP_ABSOLUTE = self.opc.opmap['JUMP_ABSOLUTE']
JUMP_IF_FALSE = self.opc.opmap['JUMP_IF_FALSE']
JUMP_IF_TRUE = self.opc.opmap['JUMP_IF_TRUE']
END_FINALLY = self.opc.opmap['END_FINALLY']
POP_TOP = self.opc.opmap['POP_TOP']
POP_BLOCK = self.opc.opmap['POP_BLOCK']
try: SET_LINENO = self.opc.opmap['SET_LINENO']
except: SET_LINENO = None
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = ord(code[pos])
## Detect parent structure
parent = self.__structs[0]
start = parent['start']
end = parent['end']
for s in self.__structs:
if s['type'] == 'LOGIC_TEST':
continue ## logic tests are not structure containers
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end < end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.__structs)
if op == SETUP_LOOP:
start = pos+3
# this is for python2.2. Maybe we can optimize and not call this for 2.3+ -Dan
while ord(code[start]) == SET_LINENO:
start += 3
start_op = ord(code[start])
while1 = False
if start_op in (JUMP_FORWARD, JUMP_ABSOLUTE):
## This is a while 1 (has a particular structure)
start = self.__get_target(code, start, start_op)
start = self.__restrict_to_parent(start, parent)
self.__while1[pos+3] = start ## map between the 2 start points
while1 = True
if start_op == JUMP_ABSOLUTE and ord(code[pos+6])==JUMP_IF_FALSE:
# special `while 1: pass` in python2.3
self.__fixed_jumps[pos+3] = start
target = self.__get_target(code, pos, op)
end = self.__restrict_to_parent(target, parent)
if target != end:
self.__fixed_jumps[pos] = end
jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE,
start, False)
assert(jump_back is not None)
target = self.__get_target(code, jump_back, JUMP_ABSOLUTE)
i = target
while i < jump_back and ord(code[i])==SET_LINENO:
i += 3
if ord(code[i]) in (FOR_ITER, GET_ITER):
loop_type = 'FOR'
else:
lookup = [JUMP_IF_FALSE, JUMP_IF_TRUE]
test = self.__first_instr(code, pos+3, jump_back, lookup, jump_back+3)
if test is None:
# this is a special while 1 structure in python 2.4
while1 = True
else:
#assert(test is not None)
test_target = self.__get_target(code, test)
test_target = self.__restrict_to_parent(test_target, parent)
next = (ord(code[test_target]), ord(code[test_target+1]))
if next == (POP_TOP, POP_BLOCK):
self.__ignored_ifs.append(test)
else:
while1 = True
if while1 == True:
loop_type = 'WHILE1'
else:
loop_type = 'WHILE'
self.__loops.append(target)
self.__structs.append({'type': loop_type,
'start': target,
'end': jump_back})
self.__structs.append({'type': loop_type + '_ELSE',
'start': jump_back+3,
'end': end})
elif self.__list_comprehension(code, pos, op):
get_iter = self.__first_instr(code, pos+7, end, GET_ITER)
for_iter = self.__first_instr(code, get_iter, end, FOR_ITER)
assert(get_iter is not None and for_iter is not None)
start = get_iter+1
target = self.__get_target(code, for_iter, FOR_ITER)
end = self.__restrict_to_parent(target, parent)
jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE,
start, False)
assert(jump_back is not None)
target = self.__get_target(code, jump_back, JUMP_ABSOLUTE)
start = self.__restrict_to_parent(target, parent)
self.__structs.append({'type': 'LIST_COMPREHENSION',
'start': start,
'end': jump_back})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.__get_target(code, pos, op)
# this should be redundant as it can't be out of boundaries -Dan
# check if it can be removed
end = self.__restrict_to_parent(target, parent)
if target != end:
#print "!!!!found except target != end: %s %s" % (target, end)
self.__fixed_jumps[pos] = end
## Add the try block
self.__structs.append({'type': 'TRY',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
start = end
target = self.__get_target(code, start-3)
#self.__fix_parent(code, target, parent)
try_else_start = target
end = self.__restrict_to_parent(target, parent)
if target != end:
self.__fixed_jumps[start-3] = end
end_finally = self.__last_instr(code, start, end, END_FINALLY)
assert(end_finally is not None)
lookup = [JUMP_ABSOLUTE, JUMP_FORWARD]
jump_end = self.__last_instr(code, start, end_finally, lookup)
assert(jump_end is not None)
target = self.__get_target(code, jump_end)
if target == try_else_start:
end = end_finally+1
else:
end = self.__restrict_to_parent(target, parent)
if target != end:
self.__fixed_jumps[jump_end] = end
## Add the try-else block
self.__structs.append({'type': 'TRY_ELSE',
'start': end_finally+1,
'end': end})
## Add the except blocks
i = start
while i < end_finally:
jmp = self.__next_except_jump(code, i, end_finally, target)
if jmp is None:
break
if i!=start and ord(code[i])==POP_TOP:
pos = i + 1
else:
pos = i
self.__structs.append({'type': 'EXCEPT',
'start': pos,
'end': jmp})
if target != end:
self.__fixed_jumps[jmp] = end
i = jmp+3
elif op == JUMP_ABSOLUTE:
## detect if we have a 'foo and bar and baz...' structure
## that was optimized (thus the presence of JUMP_ABSOLUTE)
return # no longer needed. just return. remove this elif later -Dan
if pos in self.__fixed_jumps:
return ## Already marked
if parent['end'] - pos < 7:
return
next = (ord(code[pos+3]), ord(code[pos+6]))
if next != (JUMP_IF_FALSE, POP_TOP):
return
end = self.__get_target(code, pos+3)
ifs = self.__all_instr(code, pos, end, JUMP_IF_FALSE, end)
## Test if all JUMP_IF_FALSE we have found belong to the
## structure (may not be needed but it doesn't hurt)
count = len(ifs)
if count < 2:
return
for jif in ifs[1:]:
before = ord(code[jif-3])
after = ord(code[jif+3])
if (before not in (JUMP_FORWARD, JUMP_ABSOLUTE) or
after != POP_TOP):
return
## All tests passed. Perform fixes
self.__ignored_ifs.extend(ifs)
for i in range(count-1):
self.__fixed_jumps[ifs[i]-3] = ifs[i+1]-3
elif op in (JUMP_IF_FALSE, JUMP_IF_TRUE):
if self.__ignore_if(code, pos):
return
start = pos+4 ## JUMP_IF_FALSE/TRUE + POP_TOP
target = self.__get_target(code, pos, op)
if parent['start'] <= target <= parent['end']:
if ord(code[target-3]) in (JUMP_ABSOLUTE, JUMP_FORWARD):
if_end = self.__get_target(code, target-3)
#self.__fix_parent(code, if_end, parent)
end = self.__restrict_to_parent(if_end, parent)
if ord(code[end-3]) == JUMP_ABSOLUTE:
else_end = self.__get_target(code, end-3)
if if_end == else_end and if_end in self.__loops:
end -= 3 ## skip the continue instruction
if if_end != end:
self.__fixed_jumps[target-3] = end
self.__structs.append({'type': 'IF_THEN',
'start': start,
'end': target-3})
self.__structs.append({'type': 'IF_ELSE',
'start': target+1,
'end': end})
else:
self.__structs.append({'type': 'LOGIC_TEST',
'start': start,
'end': target})
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after self.opc.findlables(), but here
for each target the number of jumps are counted.
"""
HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT
hasjrel = self.opc.hasjrel
hasjabs = self.opc.hasjabs
needFixing = (self.__pyversion >= 2.3)
n = len(code)
self.__structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.__loops = [] ## All loop entry points
self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point
self.__fixed_jumps = {} ## Map fixed jumps to their real destination
self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore
targets = {}
i = 0
while i < n:
op = ord(code[i])
if needFixing:
## Determine structures and fix jumps for 2.3+
self.__detect_structure(code, i, op)
if op >= HAVE_ARGUMENT:
label = self.__fixed_jumps.get(i)
if label is None:
oparg = ord(code[i+1]) + ord(code[i+2]) * 256
if op in hasjrel:
label = i + 3 + oparg
elif op in hasjabs:
# todo: absolute jumps
pass
if label is not None:
targets[label] = targets.get(label, 0) + 1
i += 3
else:
i += 1
return targets
def find_structures(self, code):
"""
Detect all structures in a byte code.
Return a mapping from offset to a list of keywords that should
be inserted at that position.
"""
HAVE_ARGUMENT = self.opc.HAVE_ARGUMENT
n = len(code)
self.__structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.__loops = [] ## All loop entry points
self.__while1 = {} ## 'while 1:' in python 2.3+ has another start point
self.__fixed_jumps = {} ## Map fixed jumps to their real destination
self.__ignored_ifs = [] ## JUMP_IF_XXXX's we should ignore
i = 0
while i < n:
op = ord(code[i])
if op >= HAVE_ARGUMENT:
i += 3
else:
i += 1
#from pprint import pprint
#print
#print "structures: ",
#pprint(self.__structs)
#print "loops: ",
#pprint(self.__loops)
#print "while1: ",
#pprint(self.__while1)
#print "fixed jumps: ",
#pprint(self.__fixed_jumps)
#print "ignored ifs: ",
#pprint(self.__ignored_ifs)
#print
points = {}
endpoints = {}
for s in self.__structs:
typ = s['type']
start = s['start']
end = s['end']
if typ == 'root':
continue
## startpoints of the outer structures must come first
## endpoints of the inner structures must come first
points.setdefault(start, []).append("%s_START" % typ)
endpoints.setdefault(end, []).insert(0, "%s_END" % typ)
for k, v in endpoints.items():
points.setdefault(k, []).extend(v)
#print "points: ",
#pprint(points)
#print
return points
# __scanners = {}
# def getscanner(version):
# if not __scanners.has_key(version):
# __scanners[version] = Scanner(version)
# return __scanners[version]
if __name__ == "__main__":
from uncompyle6 import PYTHON_VERSION
if PYTHON_VERSION == 2.3:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner23().disassemble(co)
for t in tokens:
print(t.format())
else:
print("Need to be Python 2.3 to demo; I am %s." %
PYTHON_VERSION)
# local variables:
# tab-width: 4

View File

@@ -23,8 +23,8 @@ JUMP_OPs = opcode_25.JUMP_OPs
# The history is that 2.7 support is the cleanest,
# then from that we got 2.6 and so on.
class Scanner25(scan.Scanner26):
def __init__(self):
scan2.Scanner2.__init__(self, 2.5)
def __init__(self, show_asm):
scan2.Scanner2.__init__(self, 2.5, show_asm)
self.stmt_opcodes = frozenset([
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP,
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,

View File

@@ -18,8 +18,8 @@ from xdis.opcodes import opcode_26
JUMP_OPs = opcode_26.JUMP_OPs
class Scanner26(scan.Scanner2):
def __init__(self):
super(Scanner26, self).__init__(2.6)
def __init__(self, show_asm=False):
super(Scanner26, self).__init__(2.6, show_asm)
self.stmt_opcodes = frozenset([
self.opc.SETUP_LOOP, self.opc.BREAK_LOOP,
self.opc.SETUP_FINALLY, self.opc.END_FINALLY,
@@ -65,7 +65,7 @@ class Scanner26(scan.Scanner2):
return
def disassemble(self, co, classname=None, code_objects={}):
def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
'''
Disassemble a code object, returning a list of 'Token'.
@@ -73,7 +73,17 @@ class Scanner26(scan.Scanner2):
dis.disassemble().
'''
# import dis; dis.disassemble(co) # DEBUG
show_asm = self.show_asm if not show_asm else show_asm
if self.show_asm in ('both', 'before'):
from xdis.bytecode import Bytecode
bytecode = Bytecode(co, self.opc)
for instr in bytecode.get_instructions(co):
print(instr._disassemble())
# from xdis.bytecode import Bytecode
# bytecode = Bytecode(co, self.opc)
# for instr in bytecode.get_instructions(co):
# print(instr._disassemble())
# Container for tokens
tokens = []
@@ -82,7 +92,8 @@ class Scanner26(scan.Scanner2):
Token = self.Token # shortcut
n = self.setup_code(co)
self.build_lines_data(co, n)
self.build_lines_data(co, n-1)
# linestarts contains block code adresses (addr,block)
self.linestarts = list(findlinestarts(co))
@@ -247,9 +258,10 @@ class Scanner26(scan.Scanner2):
pass
pass
# Debug
# for t in tokens:
# print t
if self.show_asm:
for t in tokens:
print(t)
print()
return tokens, customize
def getOpcodeToDel(self, i):
@@ -509,11 +521,8 @@ class Scanner26(scan.Scanner2):
if op in self.pop_jump_if:
target = self.get_argument(i)
target += i + 3
self.restructJump(i, target)
if self.op_hasArgument(op) and op not in self.opc.hasArgumentExtended:
i += 3
else: i += 1
i += self.op_size(op)
i=0
while i < len(self.code): # we can't use op_range for the moment
@@ -523,9 +532,17 @@ class Scanner26(scan.Scanner2):
if self.code[target] == self.opc.JA:
target = self.get_target(target)
self.restructJump(i, target)
if self.op_hasArgument(op) and op not in self.opc.hasArgumentExtended:
i += 3
else: i += 1
i += self.op_size(op)
i=0
# while i < len(self.code): # we can't use op_range for the moment
# op = self.code[i]
# name = self.opc.opname[op]
# if self.op_hasArgument(op):
# oparg = self.get_argument(i)
# print("%d %s %d" % (i, name, oparg))
# else:
# print("%d %s" % (i, name))
# i += self.op_size(op)
def restructJump(self, pos, newTarget):
if self.code[pos] not in self.opc.hasjabs + self.opc.hasjrel:
@@ -577,8 +594,8 @@ class Scanner26(scan.Scanner2):
if (jump_back and jump_back != self.prev[end]
and code[jump_back + 3] in self.jump_forward):
if (code[self.prev[end]] == self.opc.RETURN_VALUE
or code[self.prev[end]] == self.opc.POP_BLOCK
and code[self.prev[self.prev[end]]] == self.opc.RETURN_VALUE):
or (code[self.prev[end]] == self.opc.POP_BLOCK
and code[self.prev[self.prev[end]]] == self.opc.RETURN_VALUE)):
jump_back = None
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, self.opc.JA, start, False)
@@ -595,7 +612,7 @@ class Scanner26(scan.Scanner2):
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, self.opc.JA, start, False)
if end > jump_back + 4 and code[end] in (self.opc.JF, self.opc.JA):
if end > jump_back + 4 and code[end] in self.jump_forward:
if code[jump_back + 4] in (self.opc.JA, self.opc.JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
@@ -694,7 +711,9 @@ class Scanner26(scan.Scanner2):
# is this an if and
if op == self.opc.PJIF:
match = self.rem_or(start, self.next_stmt[pos], self.opc.PJIF, target)
match = self.remove_mid_line_ifs(match)
## We can't remove mid-line ifs because line structures have changed
## from restructBytecode().
## match = self.remove_mid_line_ifs(match)
if match:
if (code[pre[rtarget]] in (self.opc.JF, self.opc.JA)
and pre[rtarget] not in self.stmts
@@ -796,9 +815,7 @@ if __name__ == "__main__":
if PYTHON_VERSION == 2.6:
import inspect
co = inspect.currentframe().f_code
tokens, customize = Scanner26().disassemble(co)
for t in tokens:
print(t.format())
tokens, customize = Scanner26(show_asm=True).disassemble(co)
else:
print("Need to be Python 2.6 to demo; I am %s." %
PYTHON_VERSION)

View File

@@ -17,8 +17,8 @@ from xdis.opcodes import opcode_27
JUMP_OPs = opcode_27.JUMP_OPs
class Scanner27(Scanner2):
def __init__(self):
super(Scanner27, self).__init__(2.7)
def __init__(self, show_asm=False):
super(Scanner27, self).__init__(2.7, show_asm)
# opcodes that start statements
self.stmt_opcodes = frozenset([

View File

@@ -40,10 +40,10 @@ import uncompyle6.scanner as scan
class Scanner3(scan.Scanner):
def __init__(self, version):
super(Scanner3, self).__init__(version)
def __init__(self, version, show_asm=None):
super(Scanner3, self).__init__(version, show_asm)
def disassemble(self, co, classname=None, code_objects={}):
def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
"""
Disassemble a Python 3 code object, returning a list of 'Token'.
Various tranformations are made to assist the deparsing grammar.
@@ -55,7 +55,11 @@ class Scanner3(scan.Scanner):
dis.disassemble().
"""
# import dis; dis.disassemble(co) # DEBUG
show_asm = self.show_asm if not show_asm else show_asm
if self.show_asm in ('both', 'before'):
bytecode = Bytecode(co, self.opc)
for instr in bytecode.get_instructions(co):
print(instr._disassemble())
# Container for tokens
tokens = []
@@ -176,32 +180,6 @@ class Scanner3(scan.Scanner):
pass
return tokens, {}
def disassemble_native(self, co, classname=None, code_objects={}):
"""
Like disassemble3 but doesn't try to adjust any opcodes.
"""
# Container for tokens
tokens = []
self.code = array('B', co.co_code)
bytecode = Bytecode(co, self.opc)
for inst in bytecode:
pattr = inst.argrepr
opname = inst.opname
tokens.append(
Token(
type_ = opname,
attr = inst.argval,
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line,
)
)
pass
return tokens, {}
def build_lines_data(self, code_obj):
"""
Generate various line-related helper data.

View File

@@ -8,16 +8,15 @@ scanner routine for Python 3.
from __future__ import print_function
import xdis
# bytecode verification, verify(), uses JUMP_OPs from here
# JUMP_OPs = xdis.opcodes.opcode_32.JUMP_OPs
from xdis.opcodes import opcode_32 as opc
JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs)
from uncompyle6.scanners.scanner3 import Scanner3
class Scanner32(Scanner3):
def __init__(self):
super(Scanner3, self).__init__(3.2)
def __init__(self, show_asm=None):
super(Scanner3, self).__init__(3.2, show_asm)
return
pass

View File

@@ -8,16 +8,15 @@ scanner routine for Python 3.
from __future__ import print_function
import xdis
# bytecode verification, verify(), uses JUMP_OPs from here
JUMP_OPs = xdis.opcodes.opcode_33.JUMP_OPs
from xdis.opcodes import opcode_33 as opc
JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs)
from uncompyle6.scanners.scanner3 import Scanner3
class Scanner33(Scanner3):
def __init__(self):
super(Scanner3, self).__init__(3.3)
def __init__(self, show_asm=False):
super(Scanner3, self).__init__(3.3, show_asm)
return
pass

View File

@@ -17,8 +17,8 @@ JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs)
from uncompyle6.scanners.scanner3 import Scanner3
class Scanner34(Scanner3):
def __init__(self):
super(Scanner3, self).__init__(3.4)
def __init__(self, show_asm=None):
super(Scanner3, self).__init__(3.4, show_asm)
return
pass

View File

@@ -8,17 +8,16 @@ scanner routine for Python 3.
from __future__ import print_function
from xdis.opcodes import opcode_35 as opc
from uncompyle6.scanners.scanner3 import Scanner3
# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_35 as opc
JUMP_OPs = map(lambda op: opc.opname[op], opc.hasjrel + opc.hasjabs)
class Scanner35(Scanner3):
def __init__(self):
super(Scanner35, self).__init__(3.5)
def __init__(self, show_asm=None):
super(Scanner35, self).__init__(3.5, show_asm)
return
pass

View File

@@ -345,6 +345,16 @@ TABLE_DIRECT = {
'kv2': ( '%c: %c', 1, 2 ),
'mapexpr': ( '{%[1]C}', (0, maxint, ', ') ),
#######################
# Python 2.3 Additions
#######################
# Import style for 2.0-2.3
'importstmt20': ( '%|import %c\n', 1),
'importstar20': ( '%|from %[1]{pattr} import *\n', ),
'importfrom20': ( '%|from %[1]{pattr} import %c\n', 2 ),
'importlist20': ( '%C', (0, maxint, ', ') ),
#######################
# Python 2.5 Additions
#######################
@@ -526,7 +536,11 @@ class SourceWalker(GenericASTTraversal, object):
self.classes = []
self.pending_newlines = 0
self.hide_internal = True
self.version = version
if 2.0 <= version <= 2.3:
TABLE_DIRECT['tryfinallystmt'] = (
'%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 4 )
return
f = property(lambda s: s.params['f'],
@@ -1658,7 +1672,6 @@ class SourceWalker(GenericASTTraversal, object):
else:
defparams = node[:args_node.attr]
kw_args, annotate_args = (0, 0)
pos_args = args_node.attr
pass
if self.version > 3.0 and isLambda and iscode(node[-3].attr):