first commit

This commit is contained in:
Mysterie
2012-06-05 10:46:41 +02:00
commit b820316f37
82 changed files with 83963 additions and 0 deletions

11
MANIFEST.in Executable file
View File

@@ -0,0 +1,11 @@
include MANIFEST
include MANIFEST.in
include README
include ANNOUNCE CHANGES TODO
include uncompyle
include test_pythonlib
include test_one
include compile_tests
graft test
graft scripts
global-exclude *~ .*~

10
PKG-INFO Executable file
View File

@@ -0,0 +1,10 @@
Metadata-Version: 1.1
Name: uncompyle
Version: 1.1
Summary: Python byte-code to source-code converter
Home-page: http://github.com/sysfrog/uncompyle
Author: Hartmut Goebel
Author-email: hartmut@oberon.noris.de
License: GPLv3
Description: UNKNOWN
Platform: UNKNOWN

106
README Executable file
View File

@@ -0,0 +1,106 @@
uncompyle2
A Python 2.5, 2.6, 2.7 byte-code decompiler, written in Python 2.7
0.13
2012-6-5
Introduction
------------
'uncompyle2' converts Python byte-code back into equivalent Python
source. It accepts byte-code from Python version 2.5, 2.6 & 2.7. Additionally,
it will only run on Python 2.7.
The generated source is very readable: docstrings, lists, tuples and
hashes get pretty-printed.
'uncompyle2' may also verify the equivalence of the generated source by
by compiling it and comparing both byte-codes.
'uncompyle2' is based on John Aycock's generic small languages compiler
'spark' (http://www.csr.uvic.ca/~aycock/python/) and his prior work on
'uncompyle'.
Additional note (3 July 2004, Ben Burton):
The original website from which this software was obtained is no longer
available. It has now become a commercial decompilation service, with
no software available for download.
Any developers seeking to make alterations or enhancements to this code
should therefore consider these debian packages an appropriate starting
point.
Additional note (5 June 2012):
The decompilation of python bytecode 2.5 & 2.6 is based on the work of
Eloi Vanderbeken. bytecode is translated to a pseudo 2.7 python bytecode
and then decompiled.
Features
--------
* decompiles Python byte-code into equivalent Python source
* decompiles byte-code from Python version 2.5, 2.6, 2.7
* pretty-prints docstrings, hashes, lists and tuples
* reads directly from .pyc/.pyo files, bulk-decompile whole
directories
* output may be written to file, a directory or to stdout
* option for including byte-code disassembly into generated source
For a list of changes please refer to the 'CHANGES' file.
Requirements
------------
uncompyle2 requires Python 2.7
Installation
------------
You may either create a RPM and install this, or install directly from
the source distribution.
Creating RPMS:
python setup.py bdist_rpm
If you need to force the python interpreter to eg. pyton2:
python2 setup.py bdist_rpm --python=python2
Installation from the source distribution:
python setup.py install
To install to a user's home-dir:
python setup.py install --home=<dir>
To install to another prefix (eg. /usr/local)
python setup.py install --prefix=/usr/local
If you need to force the python interpreter to eg. pyton2:
python2 setup.py install
For more information on 'Installing Python Modules' please refer to
http://www.python.org/doc/current/inst/inst.html
Usage
-----
uncompyle2 -h prints short usage
uncompyle2 --help prints long usage
Known Bugs/Restrictions
-----------------------
I have some known bug in the 2.5 & 2.6 decompilation version but this will be fixed in a few.

View File

@@ -0,0 +1,776 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['parse', 'AST', 'ParserError', 'Parser']
from spark import GenericASTBuilder
import string, exceptions, sys
from UserList import UserList
from Scanner import Token
class AST(UserList):
def __init__(self, type, kids=[]):
self.type = intern(type)
UserList.__init__(self, kids)
def __getslice__(self, low, high): return self.data[low:high]
def __eq__(self, o):
if isinstance(o, AST):
return self.type == o.type \
and UserList.__eq__(self, o)
else:
return self.type == o
def __hash__(self): return hash(self.type)
def __repr__(self, indent=''):
rv = str(self.type)
for k in self:
rv = rv + '\n' + string.replace(str(k), '\n', '\n ')
return rv
class ParserError(Exception):
def __init__(self, token, offset):
self.token = token
self.offset = offset
def __str__(self):
return "Syntax error at or near `%r' token at offset %s" % \
(self.token, self.offset)
class Parser(GenericASTBuilder):
def __init__(self):
GenericASTBuilder.__init__(self, AST, 'stmts')
self.customized = {}
def cleanup(self):
"""
Remove recursive references to allow garbage
collector to collect this object.
"""
for dict in (self.rule2func, self.rules, self.rule2name):
for i in dict.keys():
dict[i] = None
for i in dir(self):
setattr(self, i, None)
def error(self, token):
raise ParserError(token, token.offset)
def typestring(self, token):
return token.type
def p_funcdef(self, args):
'''
stmt ::= funcdef
funcdef ::= mkfunc designator
stmt ::= funcdefdeco
funcdefdeco ::= mkfuncdeco designator
mkfuncdeco ::= expr mkfuncdeco CALL_FUNCTION_1
mkfuncdeco ::= expr mkfuncdeco0 CALL_FUNCTION_1
mkfuncdeco0 ::= mkfunc
load_closure ::= load_closure LOAD_CLOSURE
load_closure ::= LOAD_CLOSURE
'''
def p_list_comprehension(self, args):
'''
expr ::= list_compr
list_compr ::= BUILD_LIST_0 list_iter
list_iter ::= list_for
list_iter ::= list_if
list_iter ::= list_if_not
list_iter ::= lc_body
_come_from ::= COME_FROM
_come_from ::=
list_for ::= expr _for designator list_iter JUMP_BACK
list_if ::= expr jmp_false list_iter
list_if_not ::= expr jmp_true list_iter
lc_body ::= expr LIST_APPEND
'''
def p_setcomp(self, args):
'''
expr ::= setcomp
setcomp ::= LOAD_SETCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= setcomp_func
setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter
JUMP_BACK RETURN_VALUE RETURN_LAST
comp_iter ::= comp_if
comp_iter ::= comp_ifnot
comp_iter ::= comp_for
comp_iter ::= comp_body
comp_body ::= set_comp_body
comp_body ::= gen_comp_body
comp_body ::= dict_comp_body
set_comp_body ::= expr SET_ADD
gen_comp_body ::= expr YIELD_VALUE POP_TOP
dict_comp_body ::= expr expr MAP_ADD
comp_if ::= expr jmp_false comp_iter
comp_ifnot ::= expr jmp_true comp_iter
comp_for ::= expr _for designator comp_iter JUMP_BACK
'''
def p_genexpr(self, args):
'''
expr ::= genexpr
genexpr ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= genexpr_func
genexpr_func ::= LOAD_FAST FOR_ITER designator comp_iter JUMP_BACK
'''
def p_dictcomp(self, args):
'''
expr ::= dictcomp
dictcomp ::= LOAD_DICTCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= dictcomp_func
dictcomp_func ::= BUILD_MAP LOAD_FAST FOR_ITER designator
comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST
'''
def p_augmented_assign(self, args):
'''
stmt ::= augassign1
stmt ::= augassign2
augassign1 ::= expr expr inplace_op designator
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
inplace_op ROT_TWO STORE_ATTR
inplace_op ::= INPLACE_ADD
inplace_op ::= INPLACE_SUBTRACT
inplace_op ::= INPLACE_MULTIPLY
inplace_op ::= INPLACE_DIVIDE
inplace_op ::= INPLACE_TRUE_DIVIDE
inplace_op ::= INPLACE_FLOOR_DIVIDE
inplace_op ::= INPLACE_MODULO
inplace_op ::= INPLACE_POWER
inplace_op ::= INPLACE_LSHIFT
inplace_op ::= INPLACE_RSHIFT
inplace_op ::= INPLACE_AND
inplace_op ::= INPLACE_XOR
inplace_op ::= INPLACE_OR
'''
def p_assign(self, args):
'''
stmt ::= assign
assign ::= expr DUP_TOP designList
assign ::= expr designator
stmt ::= assign2
stmt ::= assign3
assign2 ::= expr expr ROT_TWO designator designator
assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator
'''
def p_print(self, args):
'''
stmt ::= print_items_stmt
stmt ::= print_nl
stmt ::= print_items_nl_stmt
print_items_stmt ::= expr PRINT_ITEM print_items_opt
print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT
print_items_opt ::= print_items
print_items_opt ::=
print_items ::= print_items print_item
print_items ::= print_item
print_item ::= expr PRINT_ITEM_CONT
print_nl ::= PRINT_NEWLINE
'''
def p_print_to(self, args):
'''
stmt ::= print_to
stmt ::= print_to_nl
stmt ::= print_nl_to
print_to ::= expr print_to_items POP_TOP
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
print_nl_to ::= expr PRINT_NEWLINE_TO
print_to_items ::= print_to_items print_to_item
print_to_items ::= print_to_item
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
'''
def p_import20(self, args):
'''
stmt ::= importstmt
stmt ::= importfrom
stmt ::= importstar
stmt ::= importmultiple
importlist2 ::= importlist2 import_as
importlist2 ::= import_as
import_as ::= IMPORT_NAME designator
import_as ::= IMPORT_NAME LOAD_ATTR designator
import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR designator
import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator
import_as ::= IMPORT_FROM designator
importstmt ::= LOAD_CONST LOAD_CONST import_as
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP
importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont
imports_cont ::= imports_cont import_cont
imports_cont ::= import_cont
import_cont ::= LOAD_CONST LOAD_CONST import_as_cont
import_as_cont ::= IMPORT_NAME_CONT designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR LOAD_ATTR designator
import_as_cont ::= IMPORT_FROM designator
'''
def p_grammar(self, args):
'''
stmts ::= stmts sstmt
stmts ::= sstmt
sstmt ::= stmt
sstmt ::= ifelsestmtr
sstmt ::= return_stmt RETURN_LAST
stmts_opt ::= stmts
stmts_opt ::= passstmt
passstmt ::=
_stmts ::= _stmts stmt
_stmts ::= stmt
c_stmts ::= _stmts
c_stmts ::= _stmts lastc_stmt
c_stmts ::= lastc_stmt
c_stmts ::= continue_stmts
lastc_stmt ::= iflaststmt
lastc_stmt ::= whileelselaststmt
lastc_stmt ::= forelselaststmt
lastc_stmt ::= ifelsestmtr
lastc_stmt ::= ifelsestmtc
lastc_stmt ::= tryelsestmtc
c_stmts_opt ::= c_stmts
c_stmts_opt ::= passstmt
l_stmts ::= _stmts
l_stmts ::= return_stmts
l_stmts ::= continue_stmts
l_stmts ::= _stmts lastl_stmt
l_stmts ::= lastl_stmt
lastl_stmt ::= iflaststmtl
lastl_stmt ::= ifelsestmtl
lastl_stmt ::= forelselaststmtl
lastl_stmt ::= tryelsestmtl
l_stmts_opt ::= l_stmts
l_stmts_opt ::= passstmt
suite_stmts ::= _stmts
suite_stmts ::= return_stmts
suite_stmts ::= continue_stmts
suite_stmts_opt ::= suite_stmts
suite_stmts_opt ::= passstmt
else_suite ::= suite_stmts
else_suitel ::= l_stmts
else_suitec ::= c_stmts
else_suitec ::= return_stmts
designList ::= designator designator
designList ::= designator DUP_TOP designList
designator ::= STORE_FAST
designator ::= STORE_NAME
designator ::= STORE_GLOBAL
designator ::= STORE_DEREF
designator ::= expr STORE_ATTR
designator ::= expr STORE_SLICE+0
designator ::= expr expr STORE_SLICE+1
designator ::= expr expr STORE_SLICE+2
designator ::= expr expr expr STORE_SLICE+3
designator ::= store_subscr
store_subscr ::= expr expr STORE_SUBSCR
designator ::= unpack
designator ::= unpack_list
stmt ::= classdef
stmt ::= call_stmt
call_stmt ::= expr POP_TOP
stmt ::= return_stmt
return_stmt ::= expr RETURN_VALUE
return_stmts ::= return_stmt
return_stmts ::= _stmts return_stmt
return_if_stmts ::= return_if_stmt
return_if_stmts ::= _stmts return_if_stmt
return_if_stmt ::= expr RETURN_END_IF
stmt ::= break_stmt
break_stmt ::= BREAK_LOOP
stmt ::= continue_stmt
continue_stmt ::= CONTINUE
continue_stmt ::= CONTINUE_LOOP
continue_stmts ::= _stmts lastl_stmt continue_stmt
continue_stmts ::= lastl_stmt continue_stmt
continue_stmts ::= continue_stmt
stmt ::= raise_stmt
raise_stmt ::= exprlist RAISE_VARARGS
raise_stmt ::= nullexprlist RAISE_VARARGS
stmt ::= exec_stmt
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
exec_stmt ::= expr exprlist EXEC_STMT
stmt ::= assert
stmt ::= assert2
stmt ::= ifstmt
stmt ::= ifelsestmt
stmt ::= whilestmt
stmt ::= whilenotstmt
stmt ::= while1stmt
stmt ::= whileelsestmt
stmt ::= while1elsestmt
stmt ::= forstmt
stmt ::= forelsestmt
stmt ::= trystmt
stmt ::= tryelsestmt
stmt ::= tryfinallystmt
stmt ::= withstmt
stmt ::= withasstmt
stmt ::= del_stmt
del_stmt ::= DELETE_FAST
del_stmt ::= DELETE_NAME
del_stmt ::= DELETE_GLOBAL
del_stmt ::= expr DELETE_SLICE+0
del_stmt ::= expr expr DELETE_SLICE+1
del_stmt ::= expr expr DELETE_SLICE+2
del_stmt ::= expr expr expr DELETE_SLICE+3
del_stmt ::= delete_subscr
delete_subscr ::= expr expr DELETE_SUBSCR
del_stmt ::= expr DELETE_ATTR
kwarg ::= LOAD_CONST expr
classdef ::= LOAD_CONST expr mkfunc
CALL_FUNCTION_0 BUILD_CLASS designator
stmt ::= classdefdeco
classdefdeco ::= classdefdeco1 designator
classdefdeco1 ::= expr classdefdeco1 CALL_FUNCTION_1
classdefdeco1 ::= expr classdefdeco2 CALL_FUNCTION_1
classdefdeco2 ::= LOAD_CONST expr mkfunc CALL_FUNCTION_0 BUILD_CLASS
_jump ::= JUMP_ABSOLUTE
_jump ::= JUMP_FORWARD
_jump ::= JUMP_BACK
jmp_false ::= POP_JUMP_IF_FALSE
jmp_false ::= JUMP_IF_FALSE
jmp_true ::= POP_JUMP_IF_TRUE
jmp_true ::= JUMP_IF_TRUE
multi_come_from ::= multi_come_from COME_FROM
multi_come_from ::=
assert_end ::= multi_come_from POP_TOP
assert_end ::=
assert ::= assert_expr jmp_true
LOAD_ASSERT RAISE_VARARGS assert_end
assert2 ::= assert_expr jmp_true
LOAD_ASSERT expr RAISE_VARARGS assert_end
assert ::= assert_expr jmp_true
LOAD_GLOBAL RAISE_VARARGS assert_end
assert2 ::= assert_expr jmp_true
LOAD_GLOBAL expr RAISE_VARARGS assert_end
assert_expr ::= assert_expr_or
assert_expr ::= assert_expr_and
assert_expr ::= expr
assert_expr_or ::= assert_expr jmp_true expr
assert_expr_and ::= assert_expr jmp_false expr
ifstmt ::= testexpr _ifstmts_jump
testexpr ::= testfalse
testexpr ::= testtrue
testfalse ::= expr jmp_false
testtrue ::= expr jmp_true
_ifstmts_jump ::= return_if_stmts
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK
ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM
ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec
ifelsestmtr ::= testexpr return_if_stmts return_stmts
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle COME_FROM
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suite COME_FROM
tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitec COME_FROM
tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitel COME_FROM
try_middle ::= jmp_abs COME_FROM except_stmts
END_FINALLY
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
END_FINALLY COME_FROM
except_stmts ::= except_stmts except_stmt
except_stmts ::= except_stmt
except_stmt ::= except_cond1 except_suite
except_stmt ::= except_cond2 except_suite
except_stmt ::= except
except_suite ::= c_stmts_opt JUMP_FORWARD
except_suite ::= c_stmts_opt jmp_abs
except_suite ::= return_stmts
except_cond1 ::= DUP_TOP expr COMPARE_OP
jmp_false POP_TOP POP_TOP POP_TOP
except_cond2 ::= DUP_TOP expr COMPARE_OP
jmp_false POP_TOP designator POP_TOP
except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt JUMP_FORWARD
except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt jmp_abs
except ::= POP_TOP POP_TOP POP_TOP return_stmts
jmp_abs ::= JUMP_ABSOLUTE
jmp_abs ::= JUMP_BACK
tryfinallystmt ::= SETUP_FINALLY suite_stmts
POP_BLOCK LOAD_CONST
COME_FROM suite_stmts_opt END_FINALLY
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP END_FINALLY
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP END_FINALLY
whilestmt ::= SETUP_LOOP
testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK COME_FROM
whilestmt ::= SETUP_LOOP
testexpr
return_stmts
POP_BLOCK COME_FROM
while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM
while1stmt ::= SETUP_LOOP return_stmts COME_FROM
whileelsestmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suite COME_FROM
whileelselaststmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suitec COME_FROM
_for ::= GET_ITER FOR_ITER
_for ::= LOAD_CONST FOR_LOOP
for_block ::= l_stmts_opt JUMP_BACK
for_block ::= return_stmts _come_from
forstmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK COME_FROM
forelsestmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suite COME_FROM
forelselaststmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitec COME_FROM
forelselaststmtl ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitel COME_FROM
'''
def p_expr(self, args):
'''
expr ::= _mklambda
expr ::= SET_LINENO
expr ::= LOAD_FAST
expr ::= LOAD_NAME
expr ::= LOAD_CONST
expr ::= LOAD_ASSERT
expr ::= LOAD_GLOBAL
expr ::= LOAD_DEREF
expr ::= LOAD_LOCALS
expr ::= load_attr
expr ::= binary_expr
expr ::= binary_expr_na
expr ::= build_list
expr ::= cmp
expr ::= mapexpr
expr ::= and
expr ::= and2
expr ::= or
expr ::= unary_expr
expr ::= call_function
expr ::= unary_not
expr ::= unary_convert
expr ::= binary_subscr
expr ::= binary_subscr2
expr ::= load_attr
expr ::= get_iter
expr ::= slice0
expr ::= slice1
expr ::= slice2
expr ::= slice3
expr ::= buildslice2
expr ::= buildslice3
expr ::= yield
binary_expr ::= expr expr binary_op
binary_op ::= BINARY_ADD
binary_op ::= BINARY_MULTIPLY
binary_op ::= BINARY_AND
binary_op ::= BINARY_OR
binary_op ::= BINARY_XOR
binary_op ::= BINARY_SUBTRACT
binary_op ::= BINARY_DIVIDE
binary_op ::= BINARY_TRUE_DIVIDE
binary_op ::= BINARY_FLOOR_DIVIDE
binary_op ::= BINARY_MODULO
binary_op ::= BINARY_LSHIFT
binary_op ::= BINARY_RSHIFT
binary_op ::= BINARY_POWER
unary_expr ::= expr unary_op
unary_op ::= UNARY_POSITIVE
unary_op ::= UNARY_NEGATIVE
unary_op ::= UNARY_INVERT
unary_not ::= expr UNARY_NOT
unary_convert ::= expr UNARY_CONVERT
binary_subscr ::= expr expr BINARY_SUBSCR
binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
load_attr ::= expr LOAD_ATTR
get_iter ::= expr GET_ITER
slice0 ::= expr SLICE+0
slice0 ::= expr DUP_TOP SLICE+0
slice1 ::= expr expr SLICE+1
slice1 ::= expr expr DUP_TOPX_2 SLICE+1
slice2 ::= expr expr SLICE+2
slice2 ::= expr expr DUP_TOPX_2 SLICE+2
slice3 ::= expr expr expr SLICE+3
slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3
buildslice3 ::= expr expr expr BUILD_SLICE_3
buildslice2 ::= expr expr BUILD_SLICE_2
yield ::= expr YIELD_VALUE
_mklambda ::= load_closure mklambda
_mklambda ::= mklambda
or ::= expr jmp_true expr _come_from
or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM
and ::= expr jmp_false expr _come_from
and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM
and2 ::= _jump jmp_false COME_FROM expr COME_FROM
expr ::= conditional
conditional ::= expr jmp_false expr JUMP_FORWARD expr COME_FROM
conditional ::= expr jmp_false expr JUMP_ABSOLUTE expr
expr ::= conditionalnot
conditionalnot ::= expr jmp_true expr _jump expr COME_FROM
stmt ::= return_lambda
stmt ::= conditional_lambda
stmt ::= conditional_lambda2
return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER
conditional_lambda ::= expr jmp_false return_if_stmt return_stmt LAMBDA_MARKER
cmp ::= cmp_list
cmp ::= compare
compare ::= expr expr COMPARE_OP
cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP
_come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list1 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list1 _come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list2 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list2 _come_from
cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD
cmp_list2 ::= expr COMPARE_OP RETURN_VALUE
mapexpr ::= BUILD_MAP kvlist
kvlist ::= kvlist kv
kvlist ::= kvlist kv2
kvlist ::= kvlist kv3
kvlist ::=
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
kv3 ::= expr expr STORE_MAP
exprlist ::= exprlist expr
exprlist ::= expr
nullexprlist ::=
'''
def nonterminal(self, nt, args):
collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items')
if nt in collect and len(args) > 1:
#
# Collect iterated thingies together.
#
rv = args[0]
rv.append(args[1])
else:
rv = GenericASTBuilder.nonterminal(self, nt, args)
return rv
def __ambiguity(self, children):
# only for debugging! to be removed hG/2000-10-15
print children
return GenericASTBuilder.ambiguity(self, children)
def resolve(self, list):
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
return 'funcdef'
if 'grammar' in list and 'expr' in list:
return 'expr'
#print >> sys.stderr, 'resolve', str(list)
return GenericASTBuilder.resolve(self, list)
nop = lambda self, args: None
p = Parser()
def parse(tokens, customize):
#
# Special handling for opcodes that take a variable number
# of arguments -- we add a new rule for each:
#
# expr ::= {expr}^n BUILD_LIST_n
# expr ::= {expr}^n BUILD_TUPLE_n
# unpack_list ::= UNPACK_LIST {expr}^n
# unpack ::= UNPACK_TUPLE {expr}^n
# unpack ::= UNPACK_SEQEUENE {expr}^n
# mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
# mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
# expr ::= expr {expr}^n CALL_FUNCTION_n
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
# expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
#
global p
for k, v in customize.items():
# avoid adding the same rule twice to this parser
if p.customized.has_key(k):
continue
p.customized[k] = None
#nop = lambda self, args: None
op = k[:string.rfind(k, '_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op == 'DUP_TOPX':
# no need to add a rule
continue
#rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
p.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop)
p.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
# rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) / 3
rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise Exception('unknown customize token %s' % k)
p.addRule(rule, nop)
ast = p.parse(tokens)
# p.cleanup()
return ast

View File

@@ -0,0 +1,849 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
class Token:
"""
Class representing a byte-code token.
A byte-code token is equivalent to the contents of one line
as output by dis.dis().
"""
def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False):
self.type = intern(type_)
self.attr = attr
self.pattr = pattr
self.offset = offset
self.linestart = linestart
def __cmp__(self, o):
if isinstance(o, Token):
# both are tokens: compare type and pattr
return cmp(self.type, o.type) or cmp(self.pattr, o.pattr)
else:
return cmp(self.type, o)
def __repr__(self): return str(self.type)
def __str__(self):
pattr = self.pattr
if self.linestart:
return '\n%s\t%-17s %r' % (self.offset, self.type, pattr)
else:
return '%s\t%-17s %r' % (self.offset, self.type, pattr)
def __hash__(self): return hash(self.type)
def __getitem__(self, i): raise IndexError
class Code:
"""
Class for representing code-objects.
This is similar to the original code object, but additionally
the diassembled code is stored in the attribute '_tokens'.
"""
def __init__(self, co, scanner, classname=None):
for i in dir(co):
if i.startswith('co_'):
setattr(self, i, getattr(co, i))
self._tokens, self._customize = scanner.disassemble(co, classname)
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = code = array('B', co.co_code)
n = len(code)
self.prev = [0]
# mapping adresses of instru & arg
for i in self.op_range(0, n):
c = code[i]
op = code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts contains bloc code adresse (addr,block)
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(code)
# contains (code, [addrRefToCode])
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if code[last_stmt] == PRINT_ITEM:
if code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if code[last_import] == IMPORT_NAME == code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = code[offset+1] + code[offset+2] * 256 + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.code[pos+1] + self.code[pos+2] * 256
if op in dis.hasjrel:
target += pos + 3
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
if target > pos:
unop_target = self.last_instr(pos, target, JF, target)
if unop_target and code[unop_target+3] != ROT_TWO:
self.fixed_jumps[pos] = unop_target
else:
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

View File

@@ -0,0 +1,945 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from struct import *
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
n = len(self.code)
# linestarts contains bloc code adresse (addr,block)
self.linestarts = list(dis.findlinestarts(co))
self.prev = [0]
pop_delet = 0
i=0
self.restructRelativeJump()
# class and names
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.names = names
# add instruction to remonde in "toDel" list
toDel = []
while i < n-pop_delet:
op = self.code[i]
ret = self.getOpcodeToDel(i)
if ret != None:
toDel += ret
if op >= dis.HAVE_ARGUMENT:
i += 2
i += 1
if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud)
toDel = sorted(list(set(toDel)))
delta = 0
for x in toDel:
if self.code[x-delta] >= dis.HAVE_ARGUMENT:
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 3
else:
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 1
# mapping adresses of prev instru
n = len(self.code)
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
j = 0
linestarts = self.linestarts
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = self.code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(self.code)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = self.code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def getOpcodeToDel(self, i):
"""
check validity of the opcode at position I and return a list of opcode to delete
"""
opcode = self.code[i]
opsize = self.op_size(opcode)
if opcode == EXTENDED_ARG:
raise 'A faire'
if opcode in (PJIF,PJIT,JA,JF):
if self.code[i+opsize] == POP_TOP:
if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \
and opcode in (JF,JA) and self.code[i+opsize] != self.code[i+opsize+3]:
pass
else:
return [i+opsize]
if opcode == RAISE_VARARGS:
if self.code[i+opsize] == POP_TOP:
return [i+opsize]
if opcode == BUILD_LIST:
if self.code[i+opsize] == DUP_TOP and self.code[i+opsize+1] in (STORE_NAME,STORE_FAST):
# del DUP/STORE_NAME x
toDel = [i+opsize,i+opsize+1]
nameDel = self.get_argument(i+opsize+1)
start = i+opsize+1
end = start
# del LOAD_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (LOAD_NAME,LOAD_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == LOAD_NAME:
end += self.op_size(LOAD_NAME)
else:
end += self.op_size(LOAD_FAST)
# log JA/POP_TOP to del and update PJIF
while start < end:
start = self.first_instr(start, len(self.code), (PJIF))
if start == None: break
target = self.get_target(start)
if self.code[target] == POP_TOP and self.code[target-3] == JA:
toDel += [target, target-3]
# update PJIF
target = self.get_target(target-3)
if target > 0xFFFF:
raise 'A gerer'
self.code[start+1] = target & 0xFF
self.code[start+2] = (target >> 8) & 0xFF
start += self.op_size(PJIF)
# del DELETE_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (DELETE_NAME,DELETE_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == DELETE_NAME:
end += self.op_size(DELETE_NAME)
else:
end += self.op_size(DELETE_FAST)
return toDel
return None
def restructRelativeJump(self):
"""
change relative JUMP_IF_FALSE/TRUE to absolut jump
and remap the target of PJIF/PJIT
"""
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_argument(i)
target += i + 3
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_target(i)
if self.code[target] == JA:
target = self.get_target(target)
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
def restructCode(self, i):
"""
restruct linestarts and jump destination after removing a POP_TOP
"""
result = list()
for item in self.linestarts:
if i < item[0]:
result.append((item[0]-1, item[1]))
else:
result.append((item[0], item[1]))
self.linestarts = result
for x in self.op_range(0, len(self.code)):
op = self.code[x]
if op >= HAVE_ARGUMENT:
if op in dis.hasjrel:
if x < i and self.get_target(x) > i:
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
elif op in dis.hasjabs:
if i < self.get_target(x):
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in dis.hasjrel:
target += pos + 3
return target
def get_argument(self, pos):
target = self.code[pos+1] + self.code[pos+2] * 256
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, (PJIF))
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
if self.code[self.prev[i]] == NOP:
i = self.prev[i]
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_FINALLY):
count_SETUP_ += 1
#return self.lines[start].next
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if jmp == None: # check
i = self.next_stmt[i]
continue
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
#self.fixed_jumps[i] = jmp
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+2, # check
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
# if it's an old JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP
#if target > pos:
# unop_target = self.last_instr(pos, target, JF, target)
# if unop_target and code[unop_target+3] != ROT_TWO:
# self.fixed_jumps[pos] = unop_target
# else:
# self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.get_argument(i)
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
#elif op in hasjabs: Pas de gestion des jump abslt
#if op in (PJIF, PJIT): Or pop a faire
#if (oparg > i):
#label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

View File

@@ -0,0 +1,937 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from struct import *
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
n = len(self.code)
# linestarts contains bloc code adresse (addr,block)
self.linestarts = list(dis.findlinestarts(co))
self.prev = [0]
pop_delet = 0
i=0
self.restructRelativeJump()
# class and names
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.names = names
# add instruction to remonde in "toDel" list
toDel = []
while i < n-pop_delet:
op = self.code[i]
ret = self.getOpcodeToDel(i)
if ret != None:
toDel += ret
if op >= dis.HAVE_ARGUMENT:
i += 2
i += 1
if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud)
toDel = sorted(list(set(toDel)))
delta = 0
for x in toDel:
if self.code[x-delta] >= dis.HAVE_ARGUMENT:
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 3
else:
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 1
# mapping adresses of prev instru
n = len(self.code)
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
j = 0
linestarts = self.linestarts
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = self.code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(self.code)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = self.code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def getOpcodeToDel(self, i):
"""
check validity of the opcode at position I and return a list of opcode to delete
"""
opcode = self.code[i]
opsize = self.op_size(opcode)
if opcode == EXTENDED_ARG:
raise 'A faire'
if opcode in (PJIF,PJIT,JA,JF):
if self.code[i+opsize] == POP_TOP:
if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \
and opcode in (JF,JA) and self.code[i+opsize] != self.code[i+opsize+3]:
pass
else:
return [i+opsize]
if opcode == BUILD_LIST:
if self.code[i+opsize] == DUP_TOP and self.code[i+opsize+1] in (STORE_NAME,STORE_FAST):
# del DUP/STORE_NAME x
toDel = [i+opsize,i+opsize+1]
nameDel = self.get_argument(i+opsize+1)
start = i+opsize+1
end = start
# del LOAD_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (LOAD_NAME,LOAD_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == LOAD_NAME:
end += self.op_size(LOAD_NAME)
else:
end += self.op_size(LOAD_FAST)
# log JA/POP_TOP to del and update PJIF
while start < end:
start = self.first_instr(start, len(self.code), (PJIF))
if start == None: break
target = self.get_target(start)
if self.code[target] == POP_TOP and self.code[target-3] == JA:
toDel += [target, target-3]
# update PJIF
target = self.get_target(target-3)
if target > 0xFFFF:
raise 'A gerer'
self.code[start+1] = target & 0xFF
self.code[start+2] = (target >> 8) & 0xFF
start += self.op_size(PJIF)
# del DELETE_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (DELETE_NAME,DELETE_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == DELETE_NAME:
end += self.op_size(DELETE_NAME)
else:
end += self.op_size(DELETE_FAST)
return toDel
return None
def restructRelativeJump(self):
"""
change relative JUMP_IF_FALSE/TRUE to absolut jump
"""
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_argument(i)
target += i + 3
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_target(i)
if self.code[target] == JA:
target = self.get_target(target)
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
def restructCode(self, i):
"""
restruct linestarts and jump destination after removing a POP_TOP
"""
result = list()
for item in self.linestarts:
if i < item[0]:
result.append((item[0]-1, item[1]))
else:
result.append((item[0], item[1]))
self.linestarts = result
for x in self.op_range(0, len(self.code)):
op = self.code[x]
if op >= HAVE_ARGUMENT:
if op in dis.hasjrel:
if x < i and self.get_target(x) > i:
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
elif op in dis.hasjabs:
if i < self.get_target(x):
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in dis.hasjrel:
target += pos + 3
return target
def get_argument(self, pos):
target = self.code[pos+1] + self.code[pos+2] * 256
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, (PJIF))
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
if self.code[self.prev[i]] == NOP:
i = self.prev[i]
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_FINALLY):
count_SETUP_ += 1
#return self.lines[start].next
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if jmp == None: # check
i = self.next_stmt[i]
continue
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
#self.fixed_jumps[i] = jmp
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+2, # check
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.get_argument(i)
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
#elif op in hasjabs: Pas de gestion des jump abslt
#if op in (PJIF, PJIT): Or pop a faire
#if (oparg > i):
#label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

View File

@@ -0,0 +1,797 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = code = array('B', co.co_code)
n = len(code)
self.prev = [0]
# mapping adresses of instru & arg
for i in self.op_range(0, n):
op = code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts contains bloc code adresse (addr,block)
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(code)
# contains (code, [addrRefToCode])
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if code[last_stmt] == PRINT_ITEM:
if code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if code[last_import] == IMPORT_NAME == code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = code[offset+1] + code[offset+2] * 256 + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.code[pos+1] + self.code[pos+2] * 256
if op in dis.hasjrel:
target += pos + 3
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
if target > pos:
unop_target = self.last_instr(pos, target, JF, target)
if unop_target and code[unop_target+3] != ROT_TWO:
self.fixed_jumps[pos] = unop_target
else:
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
print self.structs
return targets

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,232 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000 by hartmut Goebel <hartmut@goebel.noris.de>
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# See the file 'CHANGES' for a list of changes
#
# NB. This is not a masterpiece of software, but became more like a hack.
# Probably a complete rewrite would be sensefull. hG/2000-12-27
#
import sys, types, os
import Walker, verify, magics
sys.setrecursionlimit(5000)
__all__ = ['uncompyle_file', 'uncompyle_file', 'main']
def _load_file(filename):
"""
load a Python source file and compile it to byte-code
_load_module(filename: string): code_object
filename: name of file containing Python source code
(normally a .py)
code_object: code_object compiled from this source code
This function does NOT write any file!
"""
fp = open(filename, 'rb')
source = fp.read()+'\n'
try:
co = compile(source, filename, 'exec')
except SyntaxError:
print >> sys.stderr, '>>Syntax error in', filename
raise
fp.close()
return co
def _load_module(filename):
"""
load a module without importing it
_load_module(filename: string): code_object
filename: name of file containing Python byte-code object
(normally a .pyc)
code_object: code_object from this file
"""
import magics, marshal
fp = open(filename, 'rb')
magic = fp.read(4)
try:
version = float(magics.versions[magic])
except KeyError:
raise ImportError, "Unknown magic number %s in %s" % (ord(magic[0])+256*ord(magic[1]), filename)
if (version > 2.7) or (version < 2.5):
raise ImportError, "This is a Python %s file! Only Python 2.5 to 2.7 files are supported." % version
#print version
fp.read(4) # timestamp
co = marshal.load(fp)
fp.close()
return version, co
def uncompyle(version, co, out=None, showasm=0, showast=0):
"""
diassembles a given code block 'co'
"""
assert type(co) == types.CodeType
# store final output stream for case of error
__real_out = out or sys.stdout
if co.co_filename:
print >>__real_out, '#Embedded file name: %s' % co.co_filename
# diff scanner
if version == 2.7:
import Scanner27 as scan
elif version == 2.6:
import Scanner26 as scan
elif version == 2.5:
import Scanner25 as scan
scanner = scan.Scanner(version)
scanner.setShowAsm(showasm, out)
tokens, customize = scanner.disassemble(co)
#sys.exit(0)
# Build AST from disassembly.
walker = Walker.Walker(out, scanner, showast=showast)
try:
ast = walker.build_ast(tokens, customize)
except Walker.ParserError, e : # parser failed, dump disassembly
print >>__real_out, e
raise
del tokens # save memory
# convert leading '__doc__ = "..." into doc string
assert ast == 'stmts'
try:
if ast[0][0] == Walker.ASSIGN_DOC_STRING(co.co_consts[0]):
walker.print_docstring('', co.co_consts[0])
del ast[0]
if ast[-1] == Walker.RETURN_NONE:
ast.pop() # remove last node
#todo: if empty, add 'pass'
except:
pass
walker.mod_globs = Walker.find_globals(ast, set())
walker.gen_source(ast, customize)
for g in walker.mod_globs:
walker.write('global %s ## Warning: Unused global\n' % g)
if walker.ERROR:
raise walker.ERROR
def uncompyle_file(filename, outstream=None, showasm=0, showast=0):
"""
decompile Python byte-code file (.pyc)
"""
version, co = _load_module(filename)
uncompyle(version, co, outstream, showasm, showast)
co = None
#---- main -------
if sys.platform.startswith('linux') and os.uname()[2][:2] == '2.':
def __memUsage():
mi = open('/proc/self/stat', 'r')
mu = mi.readline().split()[22]
mi.close()
return int(mu) / 1000000
else:
def __memUsage():
return ''
def main(in_base, out_base, files, codes, outfile=None,
showasm=0, showast=0, do_verify=0):
"""
in_base base directory for input files
out_base base directory for output files (ignored when
files list of filenames to be uncompyled (relative to src_base)
outfile write output to this filename (overwrites out_base)
For redirecting output to
- <filename> outfile=<filename> (out_base is ignored)
- files below out_base out_base=...
- stdout out_base=None, outfile=None
"""
def _get_outstream(outfile):
dir = os.path.dirname(outfile)
failed_file = outfile + '_failed'
if os.path.exists(failed_file): os.remove(failed_file)
try:
os.makedirs(dir)
except OSError:
pass
return open(outfile, 'w')
of = outfile
tot_files = okay_files = failed_files = verify_failed_files = 0
for code in codes:
version = sys.version[:3] # "2.5"
with open(code, "r") as f:
co = compile(f.read(), "", "exec")
uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast)
for file in files:
infile = os.path.join(in_base, file)
#print >>sys.stderr, infile
if of: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, file) + '_dis'
outstream = _get_outstream(outfile)
#print >>sys.stderr, outfile
# try to decomyple the input file
try:
uncompyle_file(infile, outstream, showasm, showast)
tot_files += 1
except KeyboardInterrupt:
if outfile:
outstream.close()
os.remove(outfile)
raise
except:
failed_files += 1
sys.stderr.write("### Can't uncompyle %s\n" % infile)
if outfile:
outstream.close()
os.rename(outfile, outfile + '_failed')
import traceback
traceback.print_exc()
#raise
else: # uncompyle successfull
if outfile:
outstream.close()
if do_verify:
try:
verify.compare_code_with_srcfile(infile, outfile)
print "+++ okay decompyling", infile, __memUsage()
okay_files += 1
except verify.VerifyCmpError, e:
verify_failed_files += 1
os.rename(outfile, outfile + '_unverified')
print >>sys.stderr, "### Error Verifiying", file
print >>sys.stderr, e
else:
okay_files += 1
print "+++ okay decompyling", infile, __memUsage()
return (tot_files, okay_files, failed_files, verify_failed_files)

View File

@@ -0,0 +1,238 @@
"""Disassembler of Python byte code into mnemonics."""
import sys
import types
_have_code = (types.MethodType, types.FunctionType, types.CodeType, types.ClassType, type)
def dis(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
def distb(tb=None):
"""Disassemble a traceback (default: last traceback)."""
if tb is None:
try:
tb = sys.last_traceback
except AttributeError:
raise RuntimeError, "no last traceback to disassemble"
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti)
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
code = co.co_code
labels = findlabels(code)
linestarts = dict(findlinestarts(co))
n = len(code)
i = 0
extended_arg = 0
free = None
while i < n:
c = code[i]
op = ord(c)
if i in linestarts:
if i > 0:
print
print "%3d" % linestarts[i],
else:
print ' ',
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(20),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
print repr(oparg).rjust(5),
if op in hasconst:
print '(' + repr(co.co_consts[oparg]) + ')',
elif op in hasname:
print '(' + co.co_names[oparg] + ')',
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
print '(' + co.co_varnames[oparg] + ')',
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
print '(' + free[oparg] + ')',
print
def disassemble_string(code, lasti=-1, varnames=None, names=None,
constants=None):
labels = findlabels(code)
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(15),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
print repr(oparg).rjust(5),
if op in hasconst:
if constants:
print '(' + repr(constants[oparg]) + ')',
else:
print '(%d)'%oparg,
elif op in hasname:
if names is not None:
print '(' + names[oparg] + ')',
else:
print '(%d)'%oparg,
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
if varnames:
print '(' + varnames[oparg] + ')',
else:
print '(%d)' % oparg,
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
print
disco = disassemble # XXX For backwards compatibility
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
label = -1
if op in hasjrel:
label = i+oparg
elif op in hasjabs:
label = oparg
if label >= 0:
if label not in labels:
labels.append(label)
return labels
def findlinestarts(code):
"""Find the offsets in a byte code which are start of lines in the source.
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
line_increments = [ord(c) for c in code.co_lnotab[1::2]]
lastlineno = None
lineno = code.co_firstlineno
addr = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if lineno != lastlineno:
yield (addr, lineno)
lastlineno = lineno
addr += byte_incr
lineno += line_incr
if lineno != lastlineno:
yield (addr, lineno)
def setVersion(version):
if version == 2.7:
import uncompyle2.opcode.opcode_27 as opcodyn
elif version == 2.6:
import uncompyle2.opcode.opcode_26 as opcodyn
elif version == 2.5:
import uncompyle2.opcode.opcode_25 as opcodyn
globals().update({'cmp_op': opcodyn.cmp_op})
globals().update({'hasconst': opcodyn.hasconst})
globals().update({'hasname': opcodyn.hasname})
globals().update({'hasjrel': opcodyn.hasjrel})
globals().update({'hasjabs': opcodyn.hasjabs})
globals().update({'haslocal': opcodyn.haslocal})
globals().update({'hascompare': opcodyn.hascompare})
globals().update({'hasfree': opcodyn.hasfree})
globals().update({'opname': opcodyn.opname})
globals().update({'opmap': opcodyn.opmap})
globals().update({'HAVE_ARGUMENT': opcodyn.HAVE_ARGUMENT})
globals().update({'EXTENDED_ARG': opcodyn.EXTENDED_ARG})
def _test():
"""Simple test program to disassemble a file."""
if sys.argv[1:]:
if sys.argv[2:]:
sys.stderr.write("usage: python dis.py [-|file]\n")
sys.exit(2)
fn = sys.argv[1]
if not fn or fn == "-":
fn = None
else:
fn = None
if fn is None:
f = sys.stdin
else:
f = open(fn)
source = f.read()
if fn is not None:
f.close()
else:
fn = "<stdin>"
code = compile(source, fn, "exec")
dis(code)
if __name__ == "__main__":
_test()

View File

@@ -0,0 +1,66 @@
import struct
__all__ = ['magics', 'versions']
def __build_magic(magic):
return struct.pack('Hcc', magic, '\r', '\n')
def __by_version(magics):
by_version = {}
for m, v in magics.items():
by_version[v] = m
return by_version
versions = {
# taken from from Python/import.c
# magic, version
__build_magic(20121): '1.5', #1.5, 1.5.1, 1.5.2
__build_magic(50428): '1.6', #1.6
__build_magic(50823): '2.0', #2.0, 2.0.1
__build_magic(60202): '2.1', #2.1, 2.1.1, 2.1.2
__build_magic(60717): '2.2', #2.2
__build_magic(62011): '2.3', #2.3a0
__build_magic(62021): '2.3', #2.3a0
__build_magic(62041): '2.4', #2.4a0
__build_magic(62051): '2.4', #2.4a3
__build_magic(62061): '2.4', #2.4b1
__build_magic(62071): '2.5', #2.5a0
__build_magic(62081): '2.5', #2.5a0 (ast-branch)
__build_magic(62091): '2.5', #2.5a0 (with)
__build_magic(62092): '2.5', #2.5a0 (changed WITH_CLEANUP opcode)
__build_magic(62101): '2.5', #2.5b3 (fix wrong code: for x, in ...)
__build_magic(62111): '2.5', #2.5b3 (fix wrong code: x += yield)
__build_magic(62121): '2.5', #2.5c1 (fix wrong lnotab with for loops and
# storing constants that should have been removed
__build_magic(62131): '2.5', #2.5c2 (fix wrong code: for x, in ... in listcomp/genexp)
__build_magic(62151): '2.6', #2.6a0 (peephole optimizations & STORE_MAP)
__build_magic(62161): '2.6', #2.6a1 (WITH_CLEANUP optimization)
__build_magic(62171): '2.7', #2.7a0 (optimize list comprehensions/change LIST_APPEND)
__build_magic(62181): '2.7', #2.7a0 (optimize conditional branches:
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
__build_magic(62191): '2.7', #2.7a0 (introduce SETUP_WITH)
__build_magic(62201): '2.7', #2.7a0 (introduce BUILD_SET)
__build_magic(62211): '2.7' #2.7a0 (introduce MAP_ADD and SET_ADD)
}
magics = __by_version(versions)
def __show(text, magic):
print text, struct.unpack('BBBB', magic), \
struct.unpack('HBB', magic)
def test():
import imp
magic_20 = by_version['2.0']
current = imp.get_magic()
current_version = magics[current]
magic_current = by_version[ current_version ]
print type(magic_20), len(magic_20), repr(magic_20)
print
print 'This Python interpreter has version', current_version
__show('imp.get_magic():\t', current),
__show('magic[current_version]:\t', magic_current)
__show('magic_20:\t\t', magic_20)
if __name__ == '__main__':
test()

View File

@@ -0,0 +1,188 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<' + `op` + '>'
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Always zero for now
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jrel_op('JUMP_IF_FALSE', 111) # ""
jrel_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

View File

@@ -0,0 +1,190 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('LIST_APPEND', 18)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Always zero for now
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jrel_op('JUMP_IF_FALSE', 111) # ""
jrel_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

View File

@@ -0,0 +1,185 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0) # 0
def_op('POP_TOP', 1) # 15
def_op('ROT_TWO', 2) # 59
def_op('ROT_THREE', 3) # 60
def_op('DUP_TOP', 4) # 13
def_op('ROT_FOUR', 5) # 49
def_op('NOP', 9) # 53
def_op('UNARY_POSITIVE', 10) # 48
def_op('UNARY_NEGATIVE', 11) # 54
def_op('UNARY_NOT', 12) # 38
def_op('UNARY_CONVERT', 13) # 25
def_op('UNARY_INVERT', 15) # 34
def_op('LIST_APPEND', 18) # 68
def_op('BINARY_POWER', 19) # 28
def_op('BINARY_MULTIPLY', 20) # 36
def_op('BINARY_DIVIDE', 21) # 12
def_op('BINARY_MODULO', 22) # 41
def_op('BINARY_ADD', 23) # 52
def_op('BINARY_SUBTRACT', 24) # 55
def_op('BINARY_SUBSCR', 25) # 4
def_op('BINARY_FLOOR_DIVIDE', 26) # 43
def_op('BINARY_TRUE_DIVIDE', 27) # 5
def_op('INPLACE_FLOOR_DIVIDE', 28) # 32
def_op('INPLACE_TRUE_DIVIDE', 29) # 30
def_op('SLICE+0', 30) # 16
def_op('SLICE+1', 31) # 17
def_op('SLICE+2', 32) # 18
def_op('SLICE+3', 33) # 19
def_op('STORE_SLICE+0', 40) # 61
def_op('STORE_SLICE+1', 41) # 62
def_op('STORE_SLICE+2', 42) # 63
def_op('STORE_SLICE+3', 43) # 64
def_op('DELETE_SLICE+0', 50) # 44
def_op('DELETE_SLICE+1', 51) # 45
def_op('DELETE_SLICE+2', 52) # 46
def_op('DELETE_SLICE+3', 53) # 47
def_op('INPLACE_ADD', 55) # 6
def_op('INPLACE_SUBTRACT', 56) # 29
def_op('INPLACE_MULTIPLY', 57) # 8
def_op('INPLACE_DIVIDE', 58) # 27
def_op('INPLACE_MODULO', 59) # 3
def_op('STORE_SUBSCR', 60) # 31
def_op('DELETE_SUBSCR', 61) # 69
def_op('BINARY_LSHIFT', 62) # 7
def_op('BINARY_RSHIFT', 63) # 22
def_op('BINARY_AND', 64) # 50
def_op('BINARY_XOR', 65) # 21
def_op('BINARY_OR', 66) # 2
def_op('INPLACE_POWER', 67) # 57
def_op('GET_ITER', 68) # 39
def_op('PRINT_EXPR', 70) # 20
def_op('PRINT_ITEM', 71) # 9
def_op('PRINT_NEWLINE', 72) # 14
def_op('PRINT_ITEM_TO', 73) # 33
def_op('PRINT_NEWLINE_TO', 74) # 35
def_op('INPLACE_LSHIFT', 75) # 11
def_op('INPLACE_RSHIFT', 76) # 58
def_op('INPLACE_AND', 77) # 24
def_op('INPLACE_XOR', 78) # 23
def_op('INPLACE_OR', 79) # 10
def_op('BREAK_LOOP', 80) # 40
def_op('WITH_CLEANUP', 81) # 37
def_op('LOAD_LOCALS', 82) # 51
def_op('RETURN_VALUE', 83) # 66
def_op('IMPORT_STAR', 84) # 56
def_op('EXEC_STMT', 85) # 65
def_op('YIELD_VALUE', 86) # 26
def_op('POP_BLOCK', 87) # 1
def_op('END_FINALLY', 88) # 67
def_op('BUILD_CLASS', 89) # 42
HAVE_ARGUMENT = 90 # 70 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # 95 # Index in name list
name_op('DELETE_NAME', 91) # 94 # ""
def_op('UNPACK_SEQUENCE', 92) # 93 # Number of tuple items
jrel_op('FOR_ITER', 93) # 81
name_op('STORE_ATTR', 95) # 84 # Index in name list
name_op('DELETE_ATTR', 96) # 87 # ""
name_op('STORE_GLOBAL', 97) # 105 # ""
name_op('DELETE_GLOBAL', 98) # 98 # ""
def_op('DUP_TOPX', 99) # 104 # number of items to duplicate
def_op('LOAD_CONST', 100) # 72 # Index in const list
hasconst.append(100) # 72
name_op('LOAD_NAME', 101) # 79 # Index in name list
def_op('BUILD_TUPLE', 102) # 80 # Number of tuple items
def_op('BUILD_LIST', 103) # 107 # Number of list items
def_op('BUILD_MAP', 104) # 78 # Always zero for now
name_op('LOAD_ATTR', 105) # 86 # Index in name list
def_op('COMPARE_OP', 106) # 101 # Comparison operator
hascompare.append(106) # 101
name_op('IMPORT_NAME', 107) # 88 # Index in name list
name_op('IMPORT_FROM', 108) # 89 # Index in name list
jrel_op('JUMP_FORWARD', 110) # 73 # Number of bytes to skip
jabs_op('JUMP_IF_FALSE', 111) # 83 # ""
jabs_op('JUMP_IF_TRUE', 112) # 90 # ""
jabs_op('JUMP_ABSOLUTE', 113) # 103 # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # 70 # Index in name list
jabs_op('CONTINUE_LOOP', 119) # 96 # Target address
jrel_op('SETUP_LOOP', 120) # 74 # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # 75 # ""
jrel_op('SETUP_FINALLY', 122) # 106 # ""
def_op('LOAD_FAST', 124) # 92 # Local variable number
haslocal.append(124) # 92
def_op('STORE_FAST', 125) # 82 # Local variable number
haslocal.append(125) # 82
def_op('DELETE_FAST', 126) # 71 # Local variable number
haslocal.append(126) # 71
def_op('RAISE_VARARGS', 130) # 91 # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # 102 # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # 76 # Number of args with default values
def_op('BUILD_SLICE', 133) # 77 # Number of items
def_op('MAKE_CLOSURE', 134) # 85
def_op('LOAD_CLOSURE', 135) # 97
hasfree.append(135) # 97
def_op('LOAD_DEREF', 136) # 99
hasfree.append(136) # 99
def_op('STORE_DEREF', 137) # 100
hasfree.append(137) # 100
def_op('CALL_FUNCTION_VAR', 140) # 111 # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # 112 # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # 113 # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143) # 114
EXTENDED_ARG = 143 # 114
del def_op, name_op, jrel_op, jabs_op

View File

@@ -0,0 +1,186 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('LIST_APPEND', 18)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE', 111) # ""
jabs_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

View File

@@ -0,0 +1,192 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
def_op('LIST_APPEND', 94)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_SET', 104) # Number of set items
def_op('BUILD_MAP', 105) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 106) # Index in name list
def_op('COMPARE_OP', 107) # Comparison operator
hascompare.append(107)
name_op('IMPORT_NAME', 108) # Index in name list
name_op('IMPORT_FROM', 109) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code
jabs_op('JUMP_IF_TRUE_OR_POP', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # ""
jabs_op('POP_JUMP_IF_FALSE', 114) # ""
jabs_op('POP_JUMP_IF_TRUE', 115) # ""
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
jrel_op('SETUP_WITH', 143)
def_op('EXTENDED_ARG', 145)
EXTENDED_ARG = 145
def_op('SET_ADD', 146)
def_op('MAP_ADD', 147)
del def_op, name_op, jrel_op, jabs_op

View File

@@ -0,0 +1,700 @@
# Copyright (c) 1998-2002 John Aycock
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
__version__ = 'SPARK-0.7 (pre-alpha-7) uncompyle trim'
def _namelist(instance):
namelist, namedict, classlist = [], {}, [instance.__class__]
for c in classlist:
for b in c.__bases__:
classlist.append(b)
for name in c.__dict__.keys():
if not namedict.has_key(name):
namelist.append(name)
namedict[name] = 1
return namelist
#
# Extracted from GenericParser and made global so that [un]picking works.
#
class _State:
def __init__(self, stateno, items):
self.T, self.complete, self.items = [], [], items
self.stateno = stateno
class GenericParser:
#
# An Earley parser, as per J. Earley, "An Efficient Context-Free
# Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
# "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
# Carnegie-Mellon University, August 1968. New formulation of
# the parser according to J. Aycock, "Practical Earley Parsing
# and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
# 2001, and J. Aycock and R. N. Horspool, "Practical Earley
# Parsing", unpublished paper, 2001.
#
def __init__(self, start):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
self.augment(start)
self.ruleschanged = 1
_NULLABLE = '\e_'
_START = 'START'
_BOF = '|-'
#
# When pickling, take the time to generate the full state machine;
# some information is then extraneous, too. Unfortunately we
# can't save the rule2func map.
#
def __getstate__(self):
if self.ruleschanged:
#
# XXX - duplicated from parse()
#
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
#
# XXX - should find a better way to do this..
#
changes = 1
while changes:
changes = 0
for k, v in self.edges.items():
if v is None:
state, sym = k
if self.states.has_key(state):
self.goto(state, sym)
changes = 1
rv = self.__dict__.copy()
for s in self.states.values():
del s.items
del rv['rule2func']
del rv['nullable']
del rv['cores']
return rv
def __setstate__(self, D):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
start = D['rules'][self._START][0][1][1] # Blech.
self.augment(start)
D['rule2func'] = self.rule2func
D['makeSet'] = self.makeSet_fast
self.__dict__ = D
#
# A hook for GenericASTBuilder and GenericASTMatcher. Mess
# thee not with this; nor shall thee toucheth the _preprocess
# argument to addRule.
#
def preprocess(self, rule, func): return rule, func
def addRule(self, doc, func, _preprocess=1):
fn = func
rules = doc.split()
index = []
for i in xrange(len(rules)):
if rules[i] == '::=':
index.append(i-1)
index.append(len(rules))
for i in xrange(len(index)-1):
lhs = rules[index[i]]
rhs = rules[index[i]+2:index[i+1]]
rule = (lhs, tuple(rhs))
if _preprocess:
rule, fn = self.preprocess(rule, func)
if self.rules.has_key(lhs):
self.rules[lhs].append(rule)
else:
self.rules[lhs] = [ rule ]
self.rule2func[rule] = fn
self.rule2name[rule] = func.__name__[2:]
self.ruleschanged = 1
def collectRules(self):
for name in _namelist(self):
if name[:2] == 'p_':
func = getattr(self, name)
doc = func.__doc__
self.addRule(doc, func)
def augment(self, start):
rule = '%s ::= %s %s' % (self._START, self._BOF, start)
self.addRule(rule, lambda args: args[1], 0)
def computeNull(self):
self.nullable = {}
tbd = []
for rulelist in self.rules.values():
lhs = rulelist[0][0]
self.nullable[lhs] = 0
for rule in rulelist:
rhs = rule[1]
if len(rhs) == 0:
self.nullable[lhs] = 1
continue
#
# We only need to consider rules which
# consist entirely of nonterminal symbols.
# This should be a savings on typical
# grammars.
#
for sym in rhs:
if not self.rules.has_key(sym):
break
else:
tbd.append(rule)
changes = 1
while changes:
changes = 0
for lhs, rhs in tbd:
if self.nullable[lhs]:
continue
for sym in rhs:
if not self.nullable[sym]:
break
else:
self.nullable[lhs] = 1
changes = 1
def makeState0(self):
s0 = _State(0, [])
for rule in self.newrules[self._START]:
s0.items.append((rule, 0))
return s0
def finalState(self, tokens):
#
# Yuck.
#
if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
return 1
start = self.rules[self._START][0][1][1]
return self.goto(1, start)
def makeNewRules(self):
worklist = []
for rulelist in self.rules.values():
for rule in rulelist:
worklist.append((rule, 0, 1, rule))
for rule, i, candidate, oldrule in worklist:
lhs, rhs = rule
n = len(rhs)
while i < n:
sym = rhs[i]
if not self.rules.has_key(sym) or \
not self.nullable[sym]:
candidate = 0
i = i + 1
continue
newrhs = list(rhs)
newrhs[i] = self._NULLABLE+sym
newrule = (lhs, tuple(newrhs))
worklist.append((newrule, i+1,
candidate, oldrule))
candidate = 0
i = i + 1
else:
if candidate:
lhs = self._NULLABLE+lhs
rule = (lhs, rhs)
if self.newrules.has_key(lhs):
self.newrules[lhs].append(rule)
else:
self.newrules[lhs] = [ rule ]
self.new2old[rule] = oldrule
def typestring(self, token):
return None
def error(self, token):
print "Syntax error at or near `%s' token" % token
raise SystemExit
def parse(self, tokens):
sets = [ [(1,0), (2,0)] ]
self.links = {}
if self.ruleschanged:
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
for i in xrange(len(tokens)):
sets.append([])
if sets[i] == []:
break
self.makeSet(tokens[i], sets, i)
else:
sets.append([])
self.makeSet(None, sets, len(tokens))
finalitem = (self.finalState(tokens), 0)
if finalitem not in sets[-2]:
if len(tokens) > 0:
self.error(tokens[i-1])
else:
self.error(None)
return self.buildTree(self._START, finalitem,
tokens, len(sets)-2)
def isnullable(self, sym):
#
# For symbols in G_e only. If we weren't supporting 1.5,
# could just use sym.startswith().
#
return self._NULLABLE == sym[0:len(self._NULLABLE)]
def skip(self, (lhs, rhs), pos=0):
n = len(rhs)
while pos < n:
if not self.isnullable(rhs[pos]):
break
pos = pos + 1
return pos
def makeState(self, state, sym):
assert sym is not None
#
# Compute \epsilon-kernel state's core and see if
# it exists already.
#
kitems = []
for rule, pos in self.states[state].items:
lhs, rhs = rule
if rhs[pos:pos+1] == (sym,):
kitems.append((rule, self.skip(rule, pos+1)))
tcore = tuple(sorted(kitems))
if self.cores.has_key(tcore):
return self.cores[tcore]
#
# Nope, doesn't exist. Compute it and the associated
# \epsilon-nonkernel state together; we'll need it right away.
#
k = self.cores[tcore] = len(self.states)
K, NK = _State(k, kitems), _State(k+1, [])
self.states[k] = K
predicted = {}
edges = self.edges
rules = self.newrules
for X in K, NK:
worklist = X.items
for item in worklist:
rule, pos = item
lhs, rhs = rule
if pos == len(rhs):
X.complete.append(rule)
continue
nextSym = rhs[pos]
key = (X.stateno, nextSym)
if not rules.has_key(nextSym):
if not edges.has_key(key):
edges[key] = None
X.T.append(nextSym)
else:
edges[key] = None
if not predicted.has_key(nextSym):
predicted[nextSym] = 1
for prule in rules[nextSym]:
ppos = self.skip(prule)
new = (prule, ppos)
NK.items.append(new)
#
# Problem: we know K needs generating, but we
# don't yet know about NK. Can't commit anything
# regarding NK to self.edges until we're sure. Should
# we delay committing on both K and NK to avoid this
# hacky code? This creates other problems..
#
if X is K:
edges = {}
if NK.items == []:
return k
#
# Check for \epsilon-nonkernel's core. Unfortunately we
# need to know the entire set of predicted nonterminals
# to do this without accidentally duplicating states.
#
tcore = tuple(sorted(predicted.keys()))
if self.cores.has_key(tcore):
self.edges[(k, None)] = self.cores[tcore]
return k
nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
self.edges.update(edges)
self.states[nk] = NK
return k
def goto(self, state, sym):
key = (state, sym)
if not self.edges.has_key(key):
#
# No transitions from state on sym.
#
return None
rv = self.edges[key]
if rv is None:
#
# Target state isn't generated yet. Remedy this.
#
rv = self.makeState(state, sym)
self.edges[key] = rv
return rv
def gotoT(self, state, t):
return [self.goto(state, t)]
def gotoST(self, state, st):
rv = []
for t in self.states[state].T:
if st == t:
rv.append(self.goto(state, t))
return rv
def add(self, set, item, i=None, predecessor=None, causal=None):
if predecessor is None:
if item not in set:
set.append(item)
else:
key = (item, i)
if item not in set:
self.links[key] = []
set.append(item)
self.links[key].append((predecessor, causal))
def makeSet(self, token, sets, i):
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
if ttype is not None:
fn, arg = self.gotoT, ttype
else:
fn, arg = self.gotoST, token
for item in cur:
ptr = (item, i)
state, parent = item
add = fn(state, arg)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
nk = self.goto(k, None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
k = self.goto(pstate, lhs)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
self.add(cur, (k, pparent),
i, pptr, why)
nk = self.goto(k, None)
if nk is not None:
self.add(cur, (nk, i))
def makeSet_fast(self, token, sets, i):
#
# Call *only* when the entire state machine has been built!
# It relies on self.edges being filled in completely, and
# then duplicates and inlines code to boost speed at the
# cost of extreme ugliness.
#
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
for item in cur:
ptr = (item, i)
state, parent = item
if ttype is not None:
k = self.edges.get((state, ttype), None)
if k is not None:
#self.add(next, (k, parent), i+1, ptr)
#INLINED --v
new = (k, parent)
key = (new, i+1)
if new not in next:
self.links[key] = []
next.append(new)
self.links[key].append((ptr, None))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(next, (nk, i+1))
#INLINED --v
new = (nk, i+1)
if new not in next:
next.append(new)
#INLINED --^
else:
add = self.gotoST(state, token)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
#k = self.goto(pstate, lhs)
k = self.edges.get((pstate, lhs), None)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
#self.add(cur, (k, pparent),
# i, pptr, why)
#INLINED --v
new = (k, pparent)
key = (new, i)
if new not in cur:
self.links[key] = []
cur.append(new)
self.links[key].append((pptr, why))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(cur, (nk, i))
#INLINED --v
new = (nk, i)
if new not in cur:
cur.append(new)
#INLINED --^
def predecessor(self, key, causal):
for p, c in self.links[key]:
if c == causal:
return p
assert 0
def causal(self, key):
links = self.links[key]
if len(links) == 1:
return links[0][1]
choices = []
rule2cause = {}
for p, c in links:
rule = c[2]
choices.append(rule)
rule2cause[rule] = c
return rule2cause[self.ambiguity(choices)]
def deriveEpsilon(self, nt):
if len(self.newrules[nt]) > 1:
rule = self.ambiguity(self.newrules[nt])
else:
rule = self.newrules[nt][0]
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in xrange(len(rhs)-1, -1, -1):
attr[i] = self.deriveEpsilon(rhs[i])
return self.rule2func[self.new2old[rule]](attr)
def buildTree(self, nt, item, tokens, k):
state, parent = item
choices = []
for rule in self.states[state].complete:
if rule[0] == nt:
choices.append(rule)
rule = choices[0]
if len(choices) > 1:
rule = self.ambiguity(choices)
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in xrange(len(rhs)-1, -1, -1):
sym = rhs[i]
if not self.newrules.has_key(sym):
if sym != self._BOF:
attr[i] = tokens[k-1]
key = (item, k)
item, k = self.predecessor(key, None)
#elif self.isnullable(sym):
elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
attr[i] = self.deriveEpsilon(sym)
else:
key = (item, k)
why = self.causal(key)
attr[i] = self.buildTree(sym, why[0],
tokens, why[1])
item, k = self.predecessor(key, why)
return self.rule2func[self.new2old[rule]](attr)
def ambiguity(self, rules):
#
# XXX - problem here and in collectRules() if the same rule
# appears in >1 method. Also undefined results if rules
# causing the ambiguity appear in the same method.
#
sortlist = []
name2index = {}
for i in xrange(len(rules)):
lhs, rhs = rule = rules[i]
name = self.rule2name[self.new2old[rule]]
sortlist.append((len(rhs), name))
name2index[name] = i
sortlist.sort()
list = map(lambda (a,b): b, sortlist)
return rules[name2index[self.resolve(list)]]
def resolve(self, list):
#
# Resolve ambiguity in favor of the shortest RHS.
# Since we walk the tree from the top down, this
# should effectively resolve in favor of a "shift".
#
return list[0]
#
# GenericASTBuilder automagically constructs a concrete/abstract syntax tree
# for a given input. The extra argument is a class (not an instance!)
# which supports the "__setslice__" and "__len__" methods.
#
# XXX - silently overrides any user code in methods.
#
class GenericASTBuilder(GenericParser):
def __init__(self, AST, start):
GenericParser.__init__(self, start)
self.AST = AST
def preprocess(self, rule, func):
rebind = lambda lhs, self=self: \
lambda args, lhs=lhs, self=self: \
self.buildASTNode(args, lhs)
lhs, rhs = rule
return rule, rebind(lhs)
def buildASTNode(self, args, lhs):
children = []
for arg in args:
if isinstance(arg, self.AST):
children.append(arg)
else:
children.append(arg)
return self.nonterminal(lhs, children)
def nonterminal(self, type, args):
rv = self.AST(type)
rv[:len(args)] = args
return rv
#
# GenericASTTraversal is a Visitor pattern according to Design Patterns. For
# each node it attempts to invoke the method n_<node type>, falling
# back onto the default() method if the n_* can't be found. The preorder
# traversal also looks for an exit hook named n_<node type>_exit (no default
# routine is called if it's not found). To prematurely halt traversal
# of a subtree, call the prune() method -- this only makes sense for a
# preorder traversal. Node type is determined via the typestring() method.
#
class GenericASTTraversalPruningException:
pass
class GenericASTTraversal:
def __init__(self, ast):
self.ast = ast
def typestring(self, node):
return node.type
def prune(self):
raise GenericASTTraversalPruningException
def preorder(self, node=None):
if node is None:
node = self.ast
try:
name = 'n_' + self.typestring(node)
if hasattr(self, name):
func = getattr(self, name)
func(node)
else:
self.default(node)
except GenericASTTraversalPruningException:
return
for kid in node:
self.preorder(kid)
name = name + '_exit'
if hasattr(self, name):
func = getattr(self, name)
func(node)
def default(self, node):
pass

View File

@@ -0,0 +1,335 @@
#
# (C) Copyright 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
#
# byte-code verifier for uncompyle
#
import types
import operator
import dis
import uncompyle2, Scanner
BIN_OP_FUNCS = {
'BINARY_POWER': operator.pow,
'BINARY_MULTIPLY': operator.mul,
'BINARY_DIVIDE': operator.div,
'BINARY_FLOOR_DIVIDE': operator.floordiv,
'BINARY_TRUE_DIVIDE': operator.truediv,
'BINARY_MODULO' : operator.mod,
'BINARY_ADD': operator.add,
'BINARY_SUBRACT': operator.sub,
'BINARY_LSHIFT': operator.lshift,
'BINARY_RSHIFT': operator.rshift,
'BINARY_AND': operator.and_,
'BINARY_XOR': operator.xor,
'BINARY_OR': operator.or_,
}
JUMP_OPs = None
#--- exceptions ---
class VerifyCmpError(Exception):
pass
class CmpErrorConsts(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Compare Error within Consts of %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsType(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Consts type differ in %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsLen(VerifyCmpError):
"""Exception to be raised when length of co_consts differs."""
def __init__(self, name, consts1, consts2):
self.name = name
self.consts = (consts1, consts2)
def __str__(self):
return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \
(repr(self.name),
len(self.consts[0]), `self.consts[0]`,
len(self.consts[1]), `self.consts[1]`)
class CmpErrorCode(VerifyCmpError):
"""Exception to be raised when code differs."""
def __init__(self, name, index, token1, token2, tokens1, tokens2):
self.name = name
self.index = index
self.token1 = token1
self.token2 = token2
self.tokens = [tokens1, tokens2]
def __str__(self):
s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code differs in %s\n' % str(self.name))
return ('Code differs in %s at offset %s [%s] != [%s]\n\n' % \
(repr(self.name), self.index,
repr(self.token1), repr(self.token2))) + s
class CmpErrorCodeLen(VerifyCmpError):
"""Exception to be raised when code length differs."""
def __init__(self, name, tokens1, tokens2):
self.name = name
self.tokens = [tokens1, tokens2]
def __str__(self):
return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code len differs in %s\n' % str(self.name))
class CmpErrorMember(VerifyCmpError):
"""Exception to be raised when other members differ."""
def __init__(self, name, member, data1, data2):
self.name = name
self.member = member
self.data = (data1, data2)
def __str__(self):
return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \
(repr(self.member), repr(self.name),
repr(self.data[0]), repr(self.data[1]))
#--- compare ---
# these members are ignored
__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names']
def cmp_code_objects(version, code_obj1, code_obj2, name=''):
"""
Compare two code-objects.
This is the main part of this module.
"""
#print code_obj1, type(code_obj2)
assert type(code_obj1) == types.CodeType
assert type(code_obj2) == types.CodeType
#print dir(code_obj1)
if isinstance(code_obj1, object):
# new style classes (Python 2.2)
# assume _both_ code objects to be new stle classes
assert dir(code_obj1) == dir(code_obj2)
else:
# old style classes
assert dir(code_obj1) == code_obj1.__members__
assert dir(code_obj2) == code_obj2.__members__
assert code_obj1.__members__ == code_obj2.__members__
if name == '__main__':
name = code_obj1.co_name
else:
name = '%s.%s' % (name, code_obj1.co_name)
if name == '.?': name = '__main__'
if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2):
# use the new style code-classes' __cmp__ method, which
# should be faster and more sophisticated
# if this compare fails, we use the old routine to
# find out, what exactly is nor equal
# if this compare succeds, simply return
#return
pass
if isinstance(code_obj1, object):
members = filter(lambda x: x.startswith('co_'), dir(code_obj1))
else:
members = dir(code_obj1);
members.sort(); #members.reverse()
tokens1 = None
for member in members:
if member in __IGNORE_CODE_MEMBERS__:
pass
elif member == 'co_code':
scanner = Scanner.getscanner(version)
scanner.setShowAsm( showasm=0 )
global JUMP_OPs
JUMP_OPs = scanner.JUMP_OPs + ['JUMP_BACK']
# use changed Token class
# we (re)set this here to save exception handling,
# which would get 'unubersichtlich'
scanner.setTokenClass(Token)
try:
# disassemble both code-objects
tokens1,customize = scanner.disassemble(code_obj1)
del customize # save memory
tokens2,customize = scanner.disassemble(code_obj2)
del customize # save memory
finally:
scanner.resetTokenClass() # restore Token class
targets1 = dis.findlabels(code_obj1.co_code)
tokens1 = [t for t in tokens1 if t.type != 'COME_FROM']
tokens2 = [t for t in tokens2 if t.type != 'COME_FROM']
i1 = 0; i2 = 0
offset_map = {}; check_jumps = {}
while i1 < len(tokens1):
if i2 >= len(tokens2):
if len(tokens1) == len(tokens2) + 2 \
and tokens1[-1].type == 'RETURN_VALUE' \
and tokens1[-2].type == 'LOAD_CONST' \
and tokens1[-2].pattr == None \
and tokens1[-3].type == 'RETURN_VALUE':
break
else:
raise CmpErrorCodeLen(name, tokens1, tokens2)
offset_map[tokens1[i1].offset] = tokens2[i2].offset
for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
if offset2 != tokens2[i2].offset:
raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1],
tokens2[idx2], tokens1, tokens2)
if tokens1[i1] != tokens2[i2]:
if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type:
i = 1
while tokens1[i1+i].type == 'LOAD_CONST':
i += 1
if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \
and i == int(tokens1[i1+i].type.split('_')[-1]):
t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ])
if t != tokens2[i2].pattr:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
i1 += i + 1
i2 += 1
continue
elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2':
i1 += 3
i2 += 2
continue
elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS:
f = BIN_OP_FUNCS[tokens1[i1+i].type]
if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr:
i1 += 3
i2 += 1
continue
elif tokens1[i1].type == 'UNARY_NOT':
if tokens2[i2].type == 'POP_JUMP_IF_TRUE':
if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE':
i1 += 2
i2 += 1
continue
elif tokens2[i2].type == 'POP_JUMP_IF_FALSE':
if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE':
i1 += 2
i2 += 1
continue
elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \
and tokens1[i1-1].type == 'RETURN_VALUE' \
and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \
and int(tokens1[i1].offset) not in targets1:
i1 += 1
continue
elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \
and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \
and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3:
if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset):
i1 += 2
i2 += 2
continue
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr:
dest1 = int(tokens1[i1].pattr)
dest2 = int(tokens2[i2].pattr)
if tokens1[i1].type == 'JUMP_BACK':
if offset_map[dest1] != dest2:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
else:
#import pdb; pdb.set_trace()
if dest1 in check_jumps:
check_jumps[dest1].append((i1,i2,dest2))
else:
check_jumps[dest1] = [(i1,i2,dest2)]
i1 += 1
i2 += 1
del tokens1, tokens2 # save memory
elif member == 'co_consts':
# partial optimization can make the co_consts look different,
# so we'll just compare the code consts
codes1 = ( c for c in code_obj1.co_consts if type(c) == types.CodeType )
codes2 = ( c for c in code_obj2.co_consts if type(c) == types.CodeType )
for c1, c2 in zip(codes1, codes2):
cmp_code_objects(version, c1, c2, name=name)
else:
# all other members must be equal
if getattr(code_obj1, member) != getattr(code_obj2, member):
raise CmpErrorMember(name, member,
getattr(code_obj1,member),
getattr(code_obj2,member))
class Token(Scanner.Token):
"""Token class with changed semantics for 'cmp()'."""
def __cmp__(self, o):
t = self.type # shortcut
loads = ('LOAD_NAME', 'LOAD_GLOBAL', 'LOAD_CONST')
if t in loads and o.type in loads:
if self.pattr == 'None' and o.pattr == None:
return 0
if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == ():
return 0
if t == 'COME_FROM' == o.type:
return 0
if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM':
return 0
if t == 'RETURN_VALUE' and o.type == 'RETURN_END_IF':
return 0
if t == 'JUMP_IF_FALSE_OR_POP' and o.type == 'POP_JUMP_IF_FALSE':
return 0
if t in JUMP_OPs:
# ignore offset
return cmp(t, o.type)
return cmp(t, o.type) or cmp(self.pattr, o.pattr)
def __repr__(self):
return '%s %s (%s)' % (str(self.type), str(self.attr),
repr(self.pattr))
def __str__(self):
return '%s\t%-17s %r' % (self.offset, self.type, self.pattr)
def compare_code_with_srcfile(pyc_filename, src_filename):
"""Compare a .pyc with a source code file."""
version, code_obj1 = uncompyle2._load_module(pyc_filename)
code_obj2 = uncompyle2._load_file(src_filename)
cmp_code_objects(version, code_obj1, code_obj2)
def compare_files(pyc_filename1, pyc_filename2):
"""Compare two .pyc files."""
version, code_obj1 = uncompyle2._load_module(pyc_filename1)
version, code_obj2 = uncompyle2._load_module(pyc_filename2)
cmp_code_objects(version, code_obj1, code_obj2)
if __name__ == '__main__':
t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52)
t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55)
print `t1`
print `t2`
print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr)

187
build/scripts-2.7/uncompyle2 Executable file
View File

@@ -0,0 +1,187 @@
#!/usr/bin/python2.7
# Mode: -*- python -*-
#
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
#
"""
Usage: uncompyle [OPTIONS]... [ FILE | DIR]...
Examples:
uncompyle foo.pyc bar.pyc # uncompyle foo.pyc, bar.pyc to stdout
uncompyle -o . foo.pyc bar.pyc # uncompyle to ./foo.dis and ./bar.dis
uncompyle -o /tmp /usr/lib/python1.5 # uncompyle whole library
Options:
-o <path> output decompiled files to this path:
if multiple input files are decompiled, the common prefix
is stripped from these names and the remainder appended to
<path>
uncompyle -o /tmp bla/fasel.pyc bla/foo.pyc
-> /tmp/fasel.dis, /tmp/foo.dis
uncompyle -o /tmp bla/fasel.pyc bar/foo.pyc
-> /tmp/bla/fasel.dis, /tmp/bar/foo.dis
uncompyle -o /tmp /usr/lib/python1.5
-> /tmp/smtplib.dis ... /tmp/lib-tk/FixTk.dis
-c <file> attempts a disassembly after compiling <file>
-d do not print timestamps
-p <integer> use <integer> number of processes
-r recurse directories looking for .pyc and .pyo files
--verify compare generated source with input byte-code
(requires -o)
--help show this message
Debugging Options:
--showasm -a include byte-code (disables --verify)
--showast -t include AST (abstract syntax tree) (disables --verify)
Extensions of generated files:
'.dis' successfully decompiled (and verified if --verify)
'.dis_unverified' successfully decompile but --verify failed
'.nodis' uncompyle failed (contact author for enhancement)
"""
Usage_short = \
"decomyple [--help] [--verify] [--showasm] [--showast] [-o <path>] FILE|DIR..."
import sys, os, getopt
import os.path
from uncompyle2 import main, verify
import time
if sys.version[:3] != '2.7':
print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.'
sys.exit(-1)
showasm = showast = do_verify = numproc = recurse_dirs = 0
outfile = '-'
out_base = None
codes = []
timestamp = True
timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"
try:
opts, files = getopt.getopt(sys.argv[1:], 'hatdro:c:p:',
['help', 'verify', 'showast', 'showasm'])
except getopt.GetoptError, e:
print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e)
sys.exit(-1)
for opt, val in opts:
if opt in ('-h', '--help'):
print __doc__
sys.exit(0)
elif opt == '--verify':
do_verify = 1
elif opt in ('--showasm', '-a'):
showasm = 1
do_verify = 0
elif opt in ('--showast', '-t'):
showast = 1
do_verify = 0
elif opt == '-o':
outfile = val
elif opt == '-d':
timestamp = False
elif opt == '-c':
codes.append(val)
elif opt == '-p':
numproc = int(val)
elif opt == '-r':
recurse_dirs = 1
else:
print opt
print Usage_short
sys.exit(1)
# expand directory if specified
if recurse_dirs:
expanded_files = []
for f in files:
if os.path.isdir(f):
for root, _, dir_files in os.walk(f):
for df in dir_files:
if df.endswith('.pyc') or df.endswith('.pyo'):
expanded_files.append(os.path.join(root, df))
files = expanded_files
# argl, commonprefix works on strings, not on path parts,
# thus we must handle the case with files in 'some/classes'
# and 'some/cmds'
src_base = os.path.commonprefix(files)
if src_base[-1:] != os.sep:
src_base = os.path.dirname(src_base)
if src_base:
sb_len = len( os.path.join(src_base, '') )
files = map(lambda f: f[sb_len:], files)
del sb_len
if outfile == '-':
outfile = None # use stdout
elif outfile and os.path.isdir(outfile):
out_base = outfile; outfile = None
elif outfile and len(files) > 1:
out_base = outfile; outfile = None
if timestamp:
print time.strftime(timestampfmt)
if numproc <= 1:
try:
result = main(src_base, out_base, files, codes, outfile, showasm, showast, do_verify)
print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result
except (KeyboardInterrupt):
pass
except verify.VerifyCmpError:
raise
else:
from multiprocessing import Process, Queue
from Queue import Empty
fqueue = Queue(len(files)+numproc)
for f in files:
fqueue.put(f)
for i in range(numproc):
fqueue.put(None)
rqueue = Queue(numproc)
def process_func():
try:
(tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0)
while 1:
f = fqueue.get()
if f == None:
break
(t, o, f, v) = \
main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify)
tot_files += t
okay_files += o
failed_files += f
verify_failed_files += v
except (Empty, KeyboardInterrupt):
pass
rqueue.put((tot_files, okay_files, failed_files, verify_failed_files))
rqueue.close()
try:
procs = [Process(target=process_func) for i in range(numproc)]
for p in procs:
p.start()
for p in procs:
p.join()
try:
(tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0)
while 1:
(t, o, f, v) = rqueue.get(False)
tot_files += t
okay_files += o
failed_files += f
verify_failed_files += v
except Empty:
pass
print '# decompiled %i files: %i okay, %i failed, %i verify failed' % \
(tot_files, okay_files, failed_files, verify_failed_files)
except (KeyboardInterrupt, OSError):
pass
if timestamp:
print time.strftime(timestampfmt)

8
compile_tests Executable file
View File

@@ -0,0 +1,8 @@
#!/bin/sh
for ver in 1.4 1.5 1.6 2 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7; do
which python$ver > /dev/null 2>&1 && \
( python$ver test/compile_tests
python$ver -O test/compile_tests
)
done

187
scripts/uncompyle2 Executable file
View File

@@ -0,0 +1,187 @@
#!/usr/bin/env python2.7
# Mode: -*- python -*-
#
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
#
"""
Usage: uncompyle [OPTIONS]... [ FILE | DIR]...
Examples:
uncompyle foo.pyc bar.pyc # uncompyle foo.pyc, bar.pyc to stdout
uncompyle -o . foo.pyc bar.pyc # uncompyle to ./foo.dis and ./bar.dis
uncompyle -o /tmp /usr/lib/python1.5 # uncompyle whole library
Options:
-o <path> output decompiled files to this path:
if multiple input files are decompiled, the common prefix
is stripped from these names and the remainder appended to
<path>
uncompyle -o /tmp bla/fasel.pyc bla/foo.pyc
-> /tmp/fasel.dis, /tmp/foo.dis
uncompyle -o /tmp bla/fasel.pyc bar/foo.pyc
-> /tmp/bla/fasel.dis, /tmp/bar/foo.dis
uncompyle -o /tmp /usr/lib/python1.5
-> /tmp/smtplib.dis ... /tmp/lib-tk/FixTk.dis
-c <file> attempts a disassembly after compiling <file>
-d do not print timestamps
-p <integer> use <integer> number of processes
-r recurse directories looking for .pyc and .pyo files
--verify compare generated source with input byte-code
(requires -o)
--help show this message
Debugging Options:
--showasm -a include byte-code (disables --verify)
--showast -t include AST (abstract syntax tree) (disables --verify)
Extensions of generated files:
'.dis' successfully decompiled (and verified if --verify)
'.dis_unverified' successfully decompile but --verify failed
'.nodis' uncompyle failed (contact author for enhancement)
"""
Usage_short = \
"decomyple [--help] [--verify] [--showasm] [--showast] [-o <path>] FILE|DIR..."
import sys, os, getopt
import os.path
from uncompyle2 import main, verify
import time
if sys.version[:3] != '2.7':
print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.'
sys.exit(-1)
showasm = showast = do_verify = numproc = recurse_dirs = 0
outfile = '-'
out_base = None
codes = []
timestamp = True
timestampfmt = "# %Y.%m.%d %H:%M:%S %Z"
try:
opts, files = getopt.getopt(sys.argv[1:], 'hatdro:c:p:',
['help', 'verify', 'showast', 'showasm'])
except getopt.GetoptError, e:
print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e)
sys.exit(-1)
for opt, val in opts:
if opt in ('-h', '--help'):
print __doc__
sys.exit(0)
elif opt == '--verify':
do_verify = 1
elif opt in ('--showasm', '-a'):
showasm = 1
do_verify = 0
elif opt in ('--showast', '-t'):
showast = 1
do_verify = 0
elif opt == '-o':
outfile = val
elif opt == '-d':
timestamp = False
elif opt == '-c':
codes.append(val)
elif opt == '-p':
numproc = int(val)
elif opt == '-r':
recurse_dirs = 1
else:
print opt
print Usage_short
sys.exit(1)
# expand directory if specified
if recurse_dirs:
expanded_files = []
for f in files:
if os.path.isdir(f):
for root, _, dir_files in os.walk(f):
for df in dir_files:
if df.endswith('.pyc') or df.endswith('.pyo'):
expanded_files.append(os.path.join(root, df))
files = expanded_files
# argl, commonprefix works on strings, not on path parts,
# thus we must handle the case with files in 'some/classes'
# and 'some/cmds'
src_base = os.path.commonprefix(files)
if src_base[-1:] != os.sep:
src_base = os.path.dirname(src_base)
if src_base:
sb_len = len( os.path.join(src_base, '') )
files = map(lambda f: f[sb_len:], files)
del sb_len
if outfile == '-':
outfile = None # use stdout
elif outfile and os.path.isdir(outfile):
out_base = outfile; outfile = None
elif outfile and len(files) > 1:
out_base = outfile; outfile = None
if timestamp:
print time.strftime(timestampfmt)
if numproc <= 1:
try:
result = main(src_base, out_base, files, codes, outfile, showasm, showast, do_verify)
print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result
except (KeyboardInterrupt):
pass
except verify.VerifyCmpError:
raise
else:
from multiprocessing import Process, Queue
from Queue import Empty
fqueue = Queue(len(files)+numproc)
for f in files:
fqueue.put(f)
for i in range(numproc):
fqueue.put(None)
rqueue = Queue(numproc)
def process_func():
try:
(tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0)
while 1:
f = fqueue.get()
if f == None:
break
(t, o, f, v) = \
main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify)
tot_files += t
okay_files += o
failed_files += f
verify_failed_files += v
except (Empty, KeyboardInterrupt):
pass
rqueue.put((tot_files, okay_files, failed_files, verify_failed_files))
rqueue.close()
try:
procs = [Process(target=process_func) for i in range(numproc)]
for p in procs:
p.start()
for p in procs:
p.join()
try:
(tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0)
while 1:
(t, o, f, v) = rqueue.get(False)
tot_files += t
okay_files += o
failed_files += f
verify_failed_files += v
except Empty:
pass
print '# decompiled %i files: %i okay, %i failed, %i verify failed' % \
(tot_files, okay_files, failed_files, verify_failed_files)
except (KeyboardInterrupt, OSError):
pass
if timestamp:
print time.strftime(timestampfmt)

8
setup.cfg Executable file
View File

@@ -0,0 +1,8 @@
[bdist_rpm]
release = 1
packager = Hartmut Goebel <hartmut.goebel@noris.net>
doc_files = README
# CHANGES.txt
# USAGE.txt
# doc/
# examples/

15
setup.py Executable file
View File

@@ -0,0 +1,15 @@
#!/usr/bin/env python
"""Setup script for the 'uncompyle' distribution."""
from distutils.core import setup, Extension
setup (name = "uncompyle2",
version = "1.1",
description = "Python byte-code to source-code converter",
author = "Hartmut Goebel",
author_email = "hartmut@oberon.noris.de",
url = "http://github.com/sysfrog/uncompyle",
packages=['uncompyle2', 'uncompyle2.opcode'],
scripts=['scripts/uncompyle2'],
)

80
test/compile_tests Executable file
View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python2.5
"""
compile_tests -- compile test patterns for the decompyle test suite
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
import py_compile, os, sys, getopt
work_dir = os.path.dirname(sys.argv[0])
src_dir = work_dir
opts, args = getopt.getopt(sys.argv[1:], 's:w:')
for opt, val in opts:
if opt == '-s':
src_dir = val
if opt == '-w':
work_dir = val
else:
raise "Unknown Option '%s'" % opt
if args:
raise 'This tool does not want any arguments'
print "Using files in dir %s" % src_dir
print "Compiling into dir %s" % work_dir
tests = {}
tests['1.5'] = ["class", "del", "docstring", 'empty', "exec",
"exceptions", "expressions", "functions", "global",
"globals", "import", "integers", "lambda", "loops",
"misc", "nested_elif", "prettyprint", "print",
'single_stmt', "slices", "tuple_params", 'tuples']
tests['1.6'] = ["applyEquiv", ] + tests['1.5']
tests['2.0'] = ["augmentedAssign", "extendedImport", "extendedPrint",
"import_as", "listComprehensions", 'print_to'] + \
tests['1.6'] # [ "--extendedarg", ]
tests['2.1'] = ['loops2', 'nested_scopes'] + tests['2.0']
tests['2.2'] = ['divide_future', 'divide_no_future', 'iterators',
'yield'] + tests['2.1']
tests['2.3'] = tests['2.2']
tests['2.5'] = tests['2.3']
tests['2.6'] = tests['2.5']
tests['2.7'] = ['mine'] + tests['2.6']
total_tests = len(tests['2.7'])
#tests['2.2'].sort(); print tests['2.2']
extension = '.py' + (__debug__ and 'c' or 'o')
def compile(file, target_dir):
sfile = os.path.join(src_dir, 'test_%s.py' % file)
cfile = os.path.join(target_dir, 'test_%s%s' % (file, extension) )
py_compile.compile(sfile, cfile=cfile)
def compile_for_version(version):
target_dir = os.path.join(work_dir, 'bytecode_' + version)
if not os.path.exists(target_dir):
os.mkdir(target_dir)
for file in tests[version]:
compile(file, target_dir)
try:
version = '%i.%i' % sys.version_info[:2]
except AttributeError:
version = sys.version[:3]
print 'Compiling test files for Python', version,
print '(%i/%i files)' % (len(tests[version]), total_tests)
compile_for_version(version)
print 'Done.'

30
test/test_applyEquiv.py Executable file
View File

@@ -0,0 +1,30 @@
# applyEquiv.py -- source test pattern for equivalents of 'apply'
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
def kwfunc(**kwargs):
print kwargs.items()
def argsfunc(*args):
print args
def no_apply(*args, **kwargs):
print args
print kwargs.items()
argsfunc(34)
foo = argsfunc(*args)
argsfunc(*args)
argsfunc(34, *args)
kwfunc(**kwargs)
kwfunc(x=11, **kwargs)
no_apply(*args, **kwargs)
no_apply(34, *args, **kwargs)
no_apply(x=11, *args, **kwargs)
no_apply(34, x=11, *args, **kwargs)
no_apply(42, 34, x=11, *args, **kwargs)
no_apply(1,2,4,8,a=2,b=3,c=5)

55
test/test_augmentedAssign.py Executable file
View File

@@ -0,0 +1,55 @@
"""
augmentedAssign.py -- source test pattern for augmented assigns
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
raise "This program can't be run"
a = 1
b = 2
a += b; print a # a = a+b = 3
a -= b; print a # a = a-b = 1
a *= b; print a # a = a*b = 2
a -= a; print a # a = a-a = 0
a += 7*3; print a # == 21
l= [1,2,3]
l[1] *= 3; print l[1]; # 6
l[1][2][3] = 7
l[1][2][3] *= 3;
l[:] += [9]; print l
l[:2] += [9]; print l
l[1:] += [9]; print l
l[1:4] += [9]; print l
l += [42,43]; print l
a.value = 1
a.value += 1;
a.b.val = 1
a.b.val += 1;
l = []
for i in range(3):
lj = []
for j in range(3):
lk = []
for k in range(3):
lk.append(0)
lj.append(lk)
l.append(lj)
i = j = k = 1
def f():
global i
i += 1
return i
l[i][j][k] = 1
i = 1
l[f()][j][k] += 1
print i, l

43
test/test_class.py Executable file
View File

@@ -0,0 +1,43 @@
"""
test_class.py -- source test pattern for class definitions
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
class A:
class A1:
def __init__(self):
print 'A1.__init__'
def foo(self):
print 'A1.foo'
def __init__(self):
print 'A.__init__'
def foo(self):
print 'A.foo'
class B:
def __init__(self):
print 'B.__init__'
def bar(self):
print 'B.bar'
class C(A,B):
def foobar(self):
print 'C.foobar'
c = C()
c.foo()
c.bar()
c.foobar()

35
test/test_del.py Executable file
View File

@@ -0,0 +1,35 @@
"""
test_del.py -- source test pattern for 'del' statements
This source is part of the decompyle test suite.
Snippet taken from python libs's test_class.py
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
raise "This program can't be run"
print 0
a = b[5]
print 1
del a
print 2
del b[5]
print 3
del testme[1]
print 4
del testme[:]
print '4a'
del testme[:42]
print '4b'
del testme[40:42]
print 5
del testme[2:1024:10]
print '5a'
del testme[40,41,42]
print 6
del testme[:42, ..., :24:, 24, 100]
print 7

6
test/test_divide_future.py Executable file
View File

@@ -0,0 +1,6 @@
from __future__ import division
print ' 1 // 2 =', 1 // 2
print '1.0 // 2.0 =', 1.0 // 2.0
print ' 1 / 2 =', 1 / 2
print '1.0 / 2.0 =', 1.0 / 2.0

6
test/test_divide_no_future.py Executable file
View File

@@ -0,0 +1,6 @@
#from __future__ import division
print ' 1 // 2 =', 1 // 2
print '1.0 // 2.0 =', 1.0 // 2.0
print ' 1 / 2 =', 1 / 2
print '1.0 / 2.0 =', 1.0 / 2.0

41
test/test_docstring.py Executable file
View File

@@ -0,0 +1,41 @@
# docstring.py -- source test pattern for doc strings
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
'''
This is a doc string
'''
def Doc_Test():
"""This has to be present"""
class XXX:
def __init__(self):
"""__init__: This has to be present"""
self.a = 1
def XXX22():
"""XXX22: This has to be present"""
pass
def XXX11():
"""XXX22: This has to be present"""
pass
def XXX12():
foo = """XXX22: This has to be present"""
pass
def XXX13():
pass
def Y11():
def Y22():
def Y33():
pass
print __doc__

0
test/test_empty.py Executable file
View File

107
test/test_exceptions.py Executable file
View File

@@ -0,0 +1,107 @@
import dis
def x11():
try:
a = 'try except'
except:
a = 2
b = '--------'
def x12():
try:
a = 'try except else(pass)'
except:
a = 2
b = '--------'
def x13():
try:
a = 'try except else(a=3)'
except:
a = 2
else:
a = 3
b = '--------'
def x21():
try:
a = 'try KeyError'
except KeyError:
a = 8
b = '--------'
def x22():
try:
a = 'try (IdxErr, KeyError) else(pass)'
except (IndexError, KeyError):
a = 8
b = '--------'
def x23():
try:
a = 'try KeyError else(a=9)'
except KeyError:
a = 8
else:
a = 9
b = '--------'
def x31():
try:
a = 'try KeyError IndexError'
except KeyError:
a = 8
except IndexError:
a = 9
b = '--------'
def x32():
try:
a = 'try KeyError IndexError else(pass)'
except KeyError:
a = 8
except IndexError:
a = 9
b = '--------'
def x33():
try:
a = 'try KeyError IndexError else(a=9)'
except KeyError:
a = 8
except IndexError:
a = 9
else:
a = 9
b = '#################'
def x41():
if (a == 1):
a = 1
elif (b == 1):
b = 1
else:
c = 1
b = '#################'
def x42():
if (a == 1):
a = 1
elif (b == 1):
b = 1
else:
c = 1
xxx = 'mmm'
if (__name__ == '__main__'):
dis.dis(xx)

13
test/test_exec.py Executable file
View File

@@ -0,0 +1,13 @@
# exec.py -- source test pattern for exec statement
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
testcode = 'a = 12'
exec testcode
exec testcode in globals()
exec testcode in globals(), locals()

13
test/test_expressions.py Executable file
View File

@@ -0,0 +1,13 @@
# expressions.py -- source test pattern for expressions
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
def _lsbStrToInt(str):
return ord(str[0]) + \
(ord(str[1]) << 8) + \
(ord(str[2]) << 16) + \
(ord(str[3]) << 24)

18
test/test_extendedImport.py Executable file
View File

@@ -0,0 +1,18 @@
# extendedImport.py -- source test pattern for extended import statements
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
import os, sys as System, time
import sys
from rfc822 import Message as Msg822
from mimetools import Message as MimeMsg, decode, choose_boundary as MimeBoundary
import test.test_StringIO as StringTest
for k, v in globals().items():
print `k`, v

14
test/test_extendedPrint.py Executable file
View File

@@ -0,0 +1,14 @@
# extendedPrint.py -- source test pattern for extended print statements
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
import sys
print >> sys.stdout, "Hello World"
print >> sys.stdout, 1,2,3
print >> sys.stdout, 1,2,3,
print >> sys.stdout

65547
test/test_extendedarg.py Executable file

File diff suppressed because it is too large Load Diff

58
test/test_functions.py Executable file
View File

@@ -0,0 +1,58 @@
# test_functions.py -- source test pattern for functions
#
# This source is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
def x0():
pass
def x1(arg1):
pass
def x2(arg1,arg2):
pass
def x3a(*args):
pass
def x3b(**kwargs):
pass
def x3c(*args, **kwargs):
pass
def x4a(foo, bar=1, bla=2, *args):
pass
def x4b(foo, bar=1, bla=2, **kwargs):
pass
def x4c(foo, bar=1, bla=2, *args, **kwargs):
pass
def func_with_tuple_args((a,b)):
print a
print b
def func_with_tuple_args2((a,b), (c,d)):
print a
print c
def func_with_tuple_args3((a,b), (c,d), *args):
print a
print c
def func_with_tuple_args4((a,b), (c,d), **kwargs):
print a
print c
def func_with_tuple_args5((a,b), (c,d), *args, **kwargs):
print a
print c
def func_with_tuple_args6((a,b), (c,d)=(2,3), *args, **kwargs):
print a
print c

26
test/test_global.py Executable file
View File

@@ -0,0 +1,26 @@
"""
test_global.py -- source test pattern for 'global' statement
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
i = 1; j = 7
def a():
def b():
def c():
k = 34
global i
i = i+k
l = 42
c()
global j
j = j+l
b()
print i, j # should print 35, 49
a()
print i, j

18
test/test_globals.py Executable file
View File

@@ -0,0 +1,18 @@
# globals.py -- test for global symbols
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
def f():
print x # would result in a 'NameError' or 'UnboundLocalError'
x = x+1
print x
raise "This program can't be run"
x = 1
f()
print x

21
test/test_import.py Executable file
View File

@@ -0,0 +1,21 @@
"""
test_import.py -- source test pattern for import statements
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
import sys
import os, sys, BaseHTTPServer
import test.test_MimeWriter
from rfc822 import Message
from mimetools import Message, decode, choose_boundary
from os import *
for k, v in globals().items():
print `k`, v

23
test/test_import_as.py Executable file
View File

@@ -0,0 +1,23 @@
"""
test_import_as.py -- source test pattern for 'import .. as 'statements
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
import sys as SYS
import os as OS, sys as SYSTEM, BaseHTTPServer as HTTPServ
import test.test_MimeWriter as Mime_Writer
from rfc822 import Message as MSG
from mimetools import Message as mimeMsg, decode, \
choose_boundary as mimeBoundry
print '---' * 20
for k, v in globals().items():
print k, repr(v)

33
test/test_integers.py Executable file
View File

@@ -0,0 +1,33 @@
"""
test_integers.py -- source test pattern for integers
This source is part of the decompyle test suite.
Snippet taken from python libs's test_class.py
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
import sys
#raise "This program can't be run"
i = 1
i = 42
i = -1
i = -42
i = sys.maxint
minint = -sys.maxint-1
print sys.maxint
print minint
print long(minint)-1
print
i = -2147483647 # == -maxint
print i, repr(i)
i = i-1
print i, repr(i)
i = -2147483648L # == minint == -maxint-1
print i, repr(i)
i = -2147483649L # == minint-1 == -maxint-2
print i, repr(i)

11
test/test_iterators.py Executable file
View File

@@ -0,0 +1,11 @@
for i in range(20):
print i,
print
for i in range(10):
print i,
#if i == 10: break
else:
print 'The End'

16
test/test_lambda.py Executable file
View File

@@ -0,0 +1,16 @@
# lambda.py -- source test pattern for lambda functions
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
palette = map(lambda a: (a,a,a), range(256))
palette = map(lambda (r,g,b): chr(r)+chr(g)+chr(b), palette)
palette = map(lambda r: r, palette)
palette = lambda (r,g,b,): r
palette = lambda (r): r
palette = lambda r: r
palette = lambda (r): r, palette

36
test/test_listComprehensions.py Executable file
View File

@@ -0,0 +1,36 @@
# listComprehensions.py -- source test pattern for list comprehensions
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
XXX = range(4)
print [i for i in XXX]
print
print [i for i in (1,2,3,4,)]
print
print [(i,1) for i in XXX]
print
print [i*2 for i in range(4)]
print
print [i*j for i in range(4)
for j in range(7)]
print [i*2 for i in range(4) if i == 0 ]
print [(i,i**2) for i in range(4) if (i % 2) == 0 ]
print [i*j for i in range(4)
if i == 2
for j in range(7)
if (i+i % 2) == 0 ]
seq1 = 'abc'
seq2 = (1,2,3)
[ (x,y) for x in seq1 for y in seq2 ]
def flatten(seq):
return [x for subseq in seq for x in subseq]
print flatten([[0], [1,2,3], [4,5], [6,7,8,9], []])

48
test/test_loops.py Executable file
View File

@@ -0,0 +1,48 @@
"""
test_loops.py -- source test pattern for loops
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
for i in range(10):
if i == 3:
continue
if i == 5:
break
print i,
else:
print 'Else'
print
for i in range(10):
if i == 3:
continue
print i,
else:
print 'Else'
i = 0
while i < 10:
i = i+1
if i == 3:
continue
if i == 5:
break
print i,
else:
print 'Else'
print
i = 0
while i < 10:
i = i+1
if i == 3:
continue
print i,
else:
print 'Else'

20
test/test_loops2.py Executable file
View File

@@ -0,0 +1,20 @@
"""
test_loops2.py -- source test pattern for loops (CONTINUE_LOOP)
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
# This is a seperate test pattern, since 'continue' within 'try'
# was not allowed till Python 2.1
for term in args:
try:
print
continue
print
except:
pass

5
test/test_mine.py Executable file
View File

@@ -0,0 +1,5 @@
sum(i*i for i in range(10))
sum(x*y for x,y in zip(xvec, yvec))

32
test/test_misc.py Executable file
View File

@@ -0,0 +1,32 @@
# slices.py -- source test pattern for slices
#
# This simple program is part of the decompyle test suite.
# Snippet taken from python libs's test_class.py
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
raise "This program can't be run"
class A:
def __init__(self, num):
self.num = num
def __repr__(self):
return str(self.num)
b = []
for i in range(10):
b.append(A(i))
for i in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
print i, '\t', len(i), len(i)-len('CALL_FUNCTION'),
print (len(i)-len('CALL_FUNCTION')) / 3,
print i[len('CALL_FUNCTION'):]
p2 = (0, 0, None)
if p2[2]:
print 'has value'
else:
print ' no value'

89
test/test_nested_elif.py Executable file
View File

@@ -0,0 +1,89 @@
# nested_elif.py -- source test pattern for nested elif
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
a = None
if a == 1:
print '1'
elif a == 2:
print '2'
if a == 1:
print '1'
elif a == 2:
print '2'
else:
print 'other'
if a == 1:
print '1'
elif a == 2:
print '2'
elif a == 3:
print '3'
else:
print 'other'
if a == 1:
print '1'
elif a == 2:
print '2'
elif a == 3:
print '3'
if a == 1:
print '1'
else:
if a == 2:
print '2'
else:
if a == 3:
print '3'
else:
print 'other'
if a == 1:
print '1'
else:
if a == 2:
print '2'
else:
print 'more'
if a == 3:
print '3'
else:
print 'other'
if a == 1:
print '1'
else:
print 'more'
if a == 2:
print '2'
else:
if a == 3:
print '3'
else:
print 'other'
if a == 1:
print '1'
else:
print 'more'
if a == 2:
print '2'
else:
print 'more'
if a == 3:
print '3'
elif a == 4:
print '4'
elif a == 4:
print '4'
else:
print 'other'

95
test/test_nested_scopes.py Executable file
View File

@@ -0,0 +1,95 @@
# test_nested_scopes.py -- source test pattern for nested scopes
#
# This source is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
from __future__ import nested_scopes
blurb = 1
def k0():
def l0(m=1):
print
l0()
def x0():
def y0():
print
y0()
def x1():
def y1():
print 'y-blurb =', blurb
y1()
def x2():
def y2():
print
blurb = 2
y2()
def x3a():
def y3a(x):
print 'y-blurb =', blurb, flurb
print
blurb = 3
flurb = 7
y3a(1)
print 'x3a-blurb =', blurb
def x3():
def y3(x):
def z():
blurb = 25
print 'z-blurb =', blurb,
z()
print 'y-blurb =', blurb,
print
blurb = 3
y3(1)
print 'x3-blurb =', blurb
def x3b():
def y3b(x):
def z():
print 'z-blurb =', blurb,
blurb = 25
z()
print 'y-blurb =', blurb,
print
blurb = 3
y3b(1)
print 'x3-blurb =', blurb
def x4():
def y4(x):
def z():
print 'z-blurb =', blurb
z()
global blurb
blurb = 3
y4(1)
def x():
def y(x):
print 'y-blurb =', blurb
blurb = 2
y(1)
def func_with_tuple_args6((a,b), (c,d)=(2,3), *args, **kwargs):
def y(x):
print 'y-a =', a
print c
def find(self, name):
# This is taken from 'What's new in Python 2.1' by amk
L = filter(lambda x, name: x == name, self.list_attribute)
x0(); x1(); x2();
x3(); x3a(); x3b();
x4(); x()
print 'blurb =', blurb

139
test/test_prettyprint.py Executable file
View File

@@ -0,0 +1,139 @@
"""
test_prettyprint.py -- source test pattern for tesing the prettyprint
funcionality of decompyle
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
import pprint
aa = 'aa'
dict0 = {
'a': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'b': 1234,
'd': aa,
aa: aa
}
dict = {
'a': 'aaa',
'b': 1234,
'c': { 'ca': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'cb': 1234,
'cc': None
},
'd': aa,
aa: aa,
'eee': { 'ca': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'cb': 1234,
'cc': None
},
'ff': aa,
}
list1 = [ '1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
aa,
'1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'1ccccccccccccccccccccccccccccccccccccccccccc' ]
list2 = [ '2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
[ '22aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
aa,
'22bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'22ccccccccccccccccccccccccccccccccccccccccccc' ],
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'ccccccccccccccccccccccccccccccccccccccccccc' ]
tuple1 = ( '1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
aa,
'1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'1ccccccccccccccccccccccccccccccccccccccccccc' )
tuple2 = ( '2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
( '22aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
aa,
'22bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'22ccccccccccccccccccccccccccccccccccccccccccc' ),
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'ccccccccccccccccccccccccccccccccccccccccccc' )
def funcA():
dict = {
'a': 'aaa',
'b': 1234,
'c': { 'ca': 'aaa',
'cb': 1234,
'cc': None
},
'd': aa,
aa: aa
}
list1 = [ '1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
aa,
'1ccccccccccccccccccccccccccccccccccccccccccc' ]
list2 = [ '2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
[ '22aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
aa,
'22bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'22ccccccccccccccccccccccccccccccccccccccccccc' ],
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'ccccccccccccccccccccccccccccccccccccccccccc' ]
tuple1 = ( '1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
aa,
'1ccccccccccccccccccccccccccccccccccccccccccc' )
tuple2 = ( '2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
( '22aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
aa,
'22bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'22ccccccccccccccccccccccccccccccccccccccccccc' ),
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'ccccccccccccccccccccccccccccccccccccccccccc' )
def funcAB():
dict = {
'a': 'aaa',
'b': 1234,
'c': { 'ca': 'aaa',
'cb': 1234,
'cc': None
},
'd': aa,
aa: aa
}
list1 = [ '1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'1ccccccccccccccccccccccccccccccccccccccccccc' ]
list2 = [ '2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
[ '22aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'22bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'22ccccccccccccccccccccccccccccccccccccccccccc' ],
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'ccccccccccccccccccccccccccccccccccccccccccc' ]
tuple1 = ( '1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'1ccccccccccccccccccccccccccccccccccccccccccc' )
tuple2 = ( '2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
( '22aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'22bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'22ccccccccccccccccccccccccccccccccccccccccccc' ),
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'ccccccccccccccccccccccccccccccccccccccccccc' )
pprint.pprint(dict0)
print
pprint.pprint(dict)
print
pprint = pprint.PrettyPrinter(indent=2)
pprint.pprint(dict0)
print
pprint.pprint(dict)
print
pprint.pprint(list1)
print
pprint.pprint(list2)

16
test/test_print.py Executable file
View File

@@ -0,0 +1,16 @@
# print.py -- source test pattern for print statements
#
# This simple program is part of the decompyle test suite.
#
# decompyle is a Python byte-code decompiler
# See http://www.goebel-consult.de/decompyle/ for download and
# for further information
print 1,2,3,4,5
a = b + 5
print 1,2,3,4,5
print 1,2,3,4,5
print
print
print 1,2,3,4,5
print

21
test/test_print_to.py Executable file
View File

@@ -0,0 +1,21 @@
"""
print_to.py -- source test pattern for 'print >> ...' statements
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
import sys
print >>sys.stdout, 1,2,3,4,5
print >>sys.stdout, 1,2,3,4,5,
print >>sys.stdout
print >>sys.stdout, 1,2,3,4,5,
print >>sys.stdout, 1,2,3,4,5,
print >>sys.stdout
print >>sys.stdout

1
test/test_single_stmt.py Executable file
View File

@@ -0,0 +1 @@
print 5

43
test/test_slices.py Executable file
View File

@@ -0,0 +1,43 @@
"""
test_slices.py -- source test pattern for slices
This source is part of the decompyle test suite.
Snippet taken from python libs's test_class.py
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
raise "This program can't be run"
testme[1]
testme[1] = 1
del testme[1]
testme[:42]
testme[:42] = "The Answer"
del testme[:42]
testme[2:1024:]
testme[:1024:10]
testme[::]
testme[2:1024:10]
testme[2:1024:10] = "A lot"
del testme[2:1024:10]
testme[:42, ..., :24:, 24, 100]
testme[:42, ..., :24:, 24, 100] = "Strange"
del testme[:42, ..., :24:, 24, 100]
testme[:]
testme[:] = 'Take all'
del testme[:]
testme[40:42]
testme[40:42] = 'Three'
del testme[40:42]
testme[40,41,42]
testme[40,41,42] = 'Another Three'
del testme[40,41,42]

24
test/test_tuple_params.py Executable file
View File

@@ -0,0 +1,24 @@
"""
test_tuple_params.py -- source test pattern for formal parameters of type tuple
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
def A(a,b,(x,y,z),c):
pass
def B(a,b=42,(x,y,z)=(1,2,3),c=17):
pass
def C((x,y,z)):
pass
def D((x,)):
pass
def E((x)):
pass

25
test/test_tuples.py Executable file
View File

@@ -0,0 +1,25 @@
"""
test_tuples.py -- source test pattern for tuples
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
a = (1,)
b = (2,3)
a,b = (1,2)
a,b = ( (1,2), (3,4,5) )
x = {}
try:
x[1,2,3]
except:
pass
x[1,2,3] = 42
print x[1,2,3]
print x[(1,2,3)]
assert x[(1,2,3)] == x[1,2,3]
del x[1,2,3]

22
test/test_yield.py Executable file
View File

@@ -0,0 +1,22 @@
from __future__ import generators
def inorder(t):
if t:
for x in inorder(t.left):
yield x
yield t.label
for x in inorder(t.right):
yield x
def generate_ints(n):
for i in range(n):
yield i*2
for i in generate_ints(5):
print i,
print
gen = generate_ints(3)
print gen.next(),
print gen.next(),
print gen.next(),
print gen.next()

16
test_one Executable file
View File

@@ -0,0 +1,16 @@
#!/bin/sh
file=$1
shift
options=$@
BASEDIR=test/bytecode_2.6
#BASEDIR=test/bytecode_2.0
#BASEDIR=test/bytecode_2.1
#BASEDIR=test/bytecode_2.2
if [ `dirname $file` == '.' ] ; then
file=$BASEDIR/test_$file.pyc
fi
python2.7 -u ./scripts/uncompyle $options $file 2>&1 |less

118
test_pythonlib Executable file
View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python
# emacs-mode: -*-python-*-
"""
test_pythonlib -- uncompyle and verify Python libraries
Usage-Examples:
test_pythonlib --all # decompile all tests (suite + libs)
test_pythonlib --all --verify # decomyile all tests and verify results
test_pythonlib --test # decompile only the testsuite
test_pythonlib --2.2 --verify # decompile and verify python lib 2.2
Adding own test-trees:
Step 1) Edit this file and add a new entry to 'test_options', eg.
test_options['mylib'] = ('/usr/lib/mylib', PYOC, 'mylib')
Step 2: Run the test:
test_pythonlib --mylib # decompile 'mylib'
test_pythonlib --mylib --verify # decompile verify 'mylib'
"""
from uncompyle import main, verify
import os, time, shutil
from fnmatch import fnmatch
#----- configure this for your needs
target_base = '/tmp/py-dis/'
lib_prefix = '/usr/lib'
PYC = ('*.pyc', )
PYO = ('*.pyo', )
PYOC = ('*.pyc', '*.pyo')
test_options = {
# name: (src_basedir, pattern, output_base_suffix)
'test': ('./test', PYOC, 'test'),
'1.5': (os.path.join(lib_prefix, 'python1.5'), PYC, 'python-lib1.5'),
'1.6': (os.path.join(lib_prefix, 'python1.6'), PYC, 'python-lib1.6'),
'2.0': (os.path.join(lib_prefix, 'python2.0'), PYC, 'python-lib2.0'),
'2.1': (os.path.join(lib_prefix, 'python2.1'), PYC, 'python-lib2.1'),
'2.2': (os.path.join(lib_prefix, 'python2.2'), PYC, 'python-lib2.2'),
'2.5': (os.path.join(lib_prefix, 'python2.5'), PYC, 'python-lib2.5'),
'2.6': (os.path.join(lib_prefix, 'python2.6'), PYC, 'python-lib2.6'),
'2.7': (os.path.join(lib_prefix, 'python2.7'), PYC, 'python-lib2.7')
}
#-----
def do_tests(src_dir, patterns, target_dir, start_with=None, do_verify=0):
def visitor(files, dirname, names):
files.extend(
[os.path.normpath(os.path.join(dirname, n))
for n in names
for pat in patterns
if fnmatch(n, pat)])
files = []
cwd = os.getcwd()
os.chdir(src_dir)
os.path.walk(os.curdir, visitor, files)
os.chdir(cwd)
files.sort()
if start_with:
try:
start_with = files.index(start_with)
files = files[start_with:]
print '>>> starting with file', files[0]
except ValueError:
pass
print time.ctime()
main(src_dir, target_dir, files, [], do_verify=do_verify)
print time.ctime()
if __name__ == '__main__':
import getopt, sys
do_verify = 0
test_dirs = []
start_with = None
test_options_keys = test_options.keys(); test_options_keys.sort()
opts, args = getopt.getopt(sys.argv[1:], '',
['start-with=', 'verify', 'all', ] \
+ test_options_keys )
for opt, val in opts:
if opt == '--verify':
do_verify = 1
elif opt == '--start-with':
start_with = val
elif opt[2:] in test_options_keys:
test_dirs.append(test_options[opt[2:]])
elif opt == '--all':
for val in test_options_keys:
test_dirs.append(test_options[val])
for src_dir, pattern, target_dir in test_dirs:
if os.path.exists(src_dir):
target_dir = os.path.join(target_base, target_dir)
if os.path.exists(target_dir):
shutil.rmtree(target_dir, ignore_errors=1)
do_tests(src_dir, pattern, target_dir, start_with, do_verify)
else:
print '### skipping', src_dir
# python 1.5:
# test/re_tests memory error
# test/test_b1 memory error
# Verification notes:
# - xdrlib fails verification due the same lambda used twice
# (verification is successfull when using original .pyo as
# input)
#

776
uncompyle2/Parser.py Executable file
View File

@@ -0,0 +1,776 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['parse', 'AST', 'ParserError', 'Parser']
from spark import GenericASTBuilder
import string, exceptions, sys
from UserList import UserList
from Scanner import Token
class AST(UserList):
def __init__(self, type, kids=[]):
self.type = intern(type)
UserList.__init__(self, kids)
def __getslice__(self, low, high): return self.data[low:high]
def __eq__(self, o):
if isinstance(o, AST):
return self.type == o.type \
and UserList.__eq__(self, o)
else:
return self.type == o
def __hash__(self): return hash(self.type)
def __repr__(self, indent=''):
rv = str(self.type)
for k in self:
rv = rv + '\n' + string.replace(str(k), '\n', '\n ')
return rv
class ParserError(Exception):
def __init__(self, token, offset):
self.token = token
self.offset = offset
def __str__(self):
return "Syntax error at or near `%r' token at offset %s" % \
(self.token, self.offset)
class Parser(GenericASTBuilder):
def __init__(self):
GenericASTBuilder.__init__(self, AST, 'stmts')
self.customized = {}
def cleanup(self):
"""
Remove recursive references to allow garbage
collector to collect this object.
"""
for dict in (self.rule2func, self.rules, self.rule2name):
for i in dict.keys():
dict[i] = None
for i in dir(self):
setattr(self, i, None)
def error(self, token):
raise ParserError(token, token.offset)
def typestring(self, token):
return token.type
def p_funcdef(self, args):
'''
stmt ::= funcdef
funcdef ::= mkfunc designator
stmt ::= funcdefdeco
funcdefdeco ::= mkfuncdeco designator
mkfuncdeco ::= expr mkfuncdeco CALL_FUNCTION_1
mkfuncdeco ::= expr mkfuncdeco0 CALL_FUNCTION_1
mkfuncdeco0 ::= mkfunc
load_closure ::= load_closure LOAD_CLOSURE
load_closure ::= LOAD_CLOSURE
'''
def p_list_comprehension(self, args):
'''
expr ::= list_compr
list_compr ::= BUILD_LIST_0 list_iter
list_iter ::= list_for
list_iter ::= list_if
list_iter ::= list_if_not
list_iter ::= lc_body
_come_from ::= COME_FROM
_come_from ::=
list_for ::= expr _for designator list_iter JUMP_BACK
list_if ::= expr jmp_false list_iter
list_if_not ::= expr jmp_true list_iter
lc_body ::= expr LIST_APPEND
'''
def p_setcomp(self, args):
'''
expr ::= setcomp
setcomp ::= LOAD_SETCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= setcomp_func
setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter
JUMP_BACK RETURN_VALUE RETURN_LAST
comp_iter ::= comp_if
comp_iter ::= comp_ifnot
comp_iter ::= comp_for
comp_iter ::= comp_body
comp_body ::= set_comp_body
comp_body ::= gen_comp_body
comp_body ::= dict_comp_body
set_comp_body ::= expr SET_ADD
gen_comp_body ::= expr YIELD_VALUE POP_TOP
dict_comp_body ::= expr expr MAP_ADD
comp_if ::= expr jmp_false comp_iter
comp_ifnot ::= expr jmp_true comp_iter
comp_for ::= expr _for designator comp_iter JUMP_BACK
'''
def p_genexpr(self, args):
'''
expr ::= genexpr
genexpr ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= genexpr_func
genexpr_func ::= LOAD_FAST FOR_ITER designator comp_iter JUMP_BACK
'''
def p_dictcomp(self, args):
'''
expr ::= dictcomp
dictcomp ::= LOAD_DICTCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= dictcomp_func
dictcomp_func ::= BUILD_MAP LOAD_FAST FOR_ITER designator
comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST
'''
def p_augmented_assign(self, args):
'''
stmt ::= augassign1
stmt ::= augassign2
augassign1 ::= expr expr inplace_op designator
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
inplace_op ROT_TWO STORE_ATTR
inplace_op ::= INPLACE_ADD
inplace_op ::= INPLACE_SUBTRACT
inplace_op ::= INPLACE_MULTIPLY
inplace_op ::= INPLACE_DIVIDE
inplace_op ::= INPLACE_TRUE_DIVIDE
inplace_op ::= INPLACE_FLOOR_DIVIDE
inplace_op ::= INPLACE_MODULO
inplace_op ::= INPLACE_POWER
inplace_op ::= INPLACE_LSHIFT
inplace_op ::= INPLACE_RSHIFT
inplace_op ::= INPLACE_AND
inplace_op ::= INPLACE_XOR
inplace_op ::= INPLACE_OR
'''
def p_assign(self, args):
'''
stmt ::= assign
assign ::= expr DUP_TOP designList
assign ::= expr designator
stmt ::= assign2
stmt ::= assign3
assign2 ::= expr expr ROT_TWO designator designator
assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator
'''
def p_print(self, args):
'''
stmt ::= print_items_stmt
stmt ::= print_nl
stmt ::= print_items_nl_stmt
print_items_stmt ::= expr PRINT_ITEM print_items_opt
print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT
print_items_opt ::= print_items
print_items_opt ::=
print_items ::= print_items print_item
print_items ::= print_item
print_item ::= expr PRINT_ITEM_CONT
print_nl ::= PRINT_NEWLINE
'''
def p_print_to(self, args):
'''
stmt ::= print_to
stmt ::= print_to_nl
stmt ::= print_nl_to
print_to ::= expr print_to_items POP_TOP
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
print_nl_to ::= expr PRINT_NEWLINE_TO
print_to_items ::= print_to_items print_to_item
print_to_items ::= print_to_item
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
'''
def p_import20(self, args):
'''
stmt ::= importstmt
stmt ::= importfrom
stmt ::= importstar
stmt ::= importmultiple
importlist2 ::= importlist2 import_as
importlist2 ::= import_as
import_as ::= IMPORT_NAME designator
import_as ::= IMPORT_NAME LOAD_ATTR designator
import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR designator
import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator
import_as ::= IMPORT_FROM designator
importstmt ::= LOAD_CONST LOAD_CONST import_as
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP
importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont
imports_cont ::= imports_cont import_cont
imports_cont ::= import_cont
import_cont ::= LOAD_CONST LOAD_CONST import_as_cont
import_as_cont ::= IMPORT_NAME_CONT designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR LOAD_ATTR designator
import_as_cont ::= IMPORT_FROM designator
'''
def p_grammar(self, args):
'''
stmts ::= stmts sstmt
stmts ::= sstmt
sstmt ::= stmt
sstmt ::= ifelsestmtr
sstmt ::= return_stmt RETURN_LAST
stmts_opt ::= stmts
stmts_opt ::= passstmt
passstmt ::=
_stmts ::= _stmts stmt
_stmts ::= stmt
c_stmts ::= _stmts
c_stmts ::= _stmts lastc_stmt
c_stmts ::= lastc_stmt
c_stmts ::= continue_stmts
lastc_stmt ::= iflaststmt
lastc_stmt ::= whileelselaststmt
lastc_stmt ::= forelselaststmt
lastc_stmt ::= ifelsestmtr
lastc_stmt ::= ifelsestmtc
lastc_stmt ::= tryelsestmtc
c_stmts_opt ::= c_stmts
c_stmts_opt ::= passstmt
l_stmts ::= _stmts
l_stmts ::= return_stmts
l_stmts ::= continue_stmts
l_stmts ::= _stmts lastl_stmt
l_stmts ::= lastl_stmt
lastl_stmt ::= iflaststmtl
lastl_stmt ::= ifelsestmtl
lastl_stmt ::= forelselaststmtl
lastl_stmt ::= tryelsestmtl
l_stmts_opt ::= l_stmts
l_stmts_opt ::= passstmt
suite_stmts ::= _stmts
suite_stmts ::= return_stmts
suite_stmts ::= continue_stmts
suite_stmts_opt ::= suite_stmts
suite_stmts_opt ::= passstmt
else_suite ::= suite_stmts
else_suitel ::= l_stmts
else_suitec ::= c_stmts
else_suitec ::= return_stmts
designList ::= designator designator
designList ::= designator DUP_TOP designList
designator ::= STORE_FAST
designator ::= STORE_NAME
designator ::= STORE_GLOBAL
designator ::= STORE_DEREF
designator ::= expr STORE_ATTR
designator ::= expr STORE_SLICE+0
designator ::= expr expr STORE_SLICE+1
designator ::= expr expr STORE_SLICE+2
designator ::= expr expr expr STORE_SLICE+3
designator ::= store_subscr
store_subscr ::= expr expr STORE_SUBSCR
designator ::= unpack
designator ::= unpack_list
stmt ::= classdef
stmt ::= call_stmt
call_stmt ::= expr POP_TOP
stmt ::= return_stmt
return_stmt ::= expr RETURN_VALUE
return_stmts ::= return_stmt
return_stmts ::= _stmts return_stmt
return_if_stmts ::= return_if_stmt
return_if_stmts ::= _stmts return_if_stmt
return_if_stmt ::= expr RETURN_END_IF
stmt ::= break_stmt
break_stmt ::= BREAK_LOOP
stmt ::= continue_stmt
continue_stmt ::= CONTINUE
continue_stmt ::= CONTINUE_LOOP
continue_stmts ::= _stmts lastl_stmt continue_stmt
continue_stmts ::= lastl_stmt continue_stmt
continue_stmts ::= continue_stmt
stmt ::= raise_stmt
raise_stmt ::= exprlist RAISE_VARARGS
raise_stmt ::= nullexprlist RAISE_VARARGS
stmt ::= exec_stmt
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
exec_stmt ::= expr exprlist EXEC_STMT
stmt ::= assert
stmt ::= assert2
stmt ::= ifstmt
stmt ::= ifelsestmt
stmt ::= whilestmt
stmt ::= whilenotstmt
stmt ::= while1stmt
stmt ::= whileelsestmt
stmt ::= while1elsestmt
stmt ::= forstmt
stmt ::= forelsestmt
stmt ::= trystmt
stmt ::= tryelsestmt
stmt ::= tryfinallystmt
stmt ::= withstmt
stmt ::= withasstmt
stmt ::= del_stmt
del_stmt ::= DELETE_FAST
del_stmt ::= DELETE_NAME
del_stmt ::= DELETE_GLOBAL
del_stmt ::= expr DELETE_SLICE+0
del_stmt ::= expr expr DELETE_SLICE+1
del_stmt ::= expr expr DELETE_SLICE+2
del_stmt ::= expr expr expr DELETE_SLICE+3
del_stmt ::= delete_subscr
delete_subscr ::= expr expr DELETE_SUBSCR
del_stmt ::= expr DELETE_ATTR
kwarg ::= LOAD_CONST expr
classdef ::= LOAD_CONST expr mkfunc
CALL_FUNCTION_0 BUILD_CLASS designator
stmt ::= classdefdeco
classdefdeco ::= classdefdeco1 designator
classdefdeco1 ::= expr classdefdeco1 CALL_FUNCTION_1
classdefdeco1 ::= expr classdefdeco2 CALL_FUNCTION_1
classdefdeco2 ::= LOAD_CONST expr mkfunc CALL_FUNCTION_0 BUILD_CLASS
_jump ::= JUMP_ABSOLUTE
_jump ::= JUMP_FORWARD
_jump ::= JUMP_BACK
jmp_false ::= POP_JUMP_IF_FALSE
jmp_false ::= JUMP_IF_FALSE
jmp_true ::= POP_JUMP_IF_TRUE
jmp_true ::= JUMP_IF_TRUE
multi_come_from ::= multi_come_from COME_FROM
multi_come_from ::=
assert_end ::= multi_come_from POP_TOP
assert_end ::=
assert ::= assert_expr jmp_true
LOAD_ASSERT RAISE_VARARGS assert_end
assert2 ::= assert_expr jmp_true
LOAD_ASSERT expr RAISE_VARARGS assert_end
assert ::= assert_expr jmp_true
LOAD_GLOBAL RAISE_VARARGS assert_end
assert2 ::= assert_expr jmp_true
LOAD_GLOBAL expr RAISE_VARARGS assert_end
assert_expr ::= assert_expr_or
assert_expr ::= assert_expr_and
assert_expr ::= expr
assert_expr_or ::= assert_expr jmp_true expr
assert_expr_and ::= assert_expr jmp_false expr
ifstmt ::= testexpr _ifstmts_jump
testexpr ::= testfalse
testexpr ::= testtrue
testfalse ::= expr jmp_false
testtrue ::= expr jmp_true
_ifstmts_jump ::= return_if_stmts
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK
ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM
ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec
ifelsestmtr ::= testexpr return_if_stmts return_stmts
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle COME_FROM
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suite COME_FROM
tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitec COME_FROM
tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitel COME_FROM
try_middle ::= jmp_abs COME_FROM except_stmts
END_FINALLY
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
END_FINALLY COME_FROM
except_stmts ::= except_stmts except_stmt
except_stmts ::= except_stmt
except_stmt ::= except_cond1 except_suite
except_stmt ::= except_cond2 except_suite
except_stmt ::= except
except_suite ::= c_stmts_opt JUMP_FORWARD
except_suite ::= c_stmts_opt jmp_abs
except_suite ::= return_stmts
except_cond1 ::= DUP_TOP expr COMPARE_OP
jmp_false POP_TOP POP_TOP POP_TOP
except_cond2 ::= DUP_TOP expr COMPARE_OP
jmp_false POP_TOP designator POP_TOP
except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt JUMP_FORWARD
except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt jmp_abs
except ::= POP_TOP POP_TOP POP_TOP return_stmts
jmp_abs ::= JUMP_ABSOLUTE
jmp_abs ::= JUMP_BACK
tryfinallystmt ::= SETUP_FINALLY suite_stmts
POP_BLOCK LOAD_CONST
COME_FROM suite_stmts_opt END_FINALLY
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP END_FINALLY
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP END_FINALLY
whilestmt ::= SETUP_LOOP
testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK COME_FROM
whilestmt ::= SETUP_LOOP
testexpr
return_stmts
POP_BLOCK COME_FROM
while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM
while1stmt ::= SETUP_LOOP return_stmts COME_FROM
whileelsestmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suite COME_FROM
whileelselaststmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suitec COME_FROM
_for ::= GET_ITER FOR_ITER
_for ::= LOAD_CONST FOR_LOOP
for_block ::= l_stmts_opt JUMP_BACK
for_block ::= return_stmts _come_from
forstmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK COME_FROM
forelsestmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suite COME_FROM
forelselaststmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitec COME_FROM
forelselaststmtl ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitel COME_FROM
'''
def p_expr(self, args):
'''
expr ::= _mklambda
expr ::= SET_LINENO
expr ::= LOAD_FAST
expr ::= LOAD_NAME
expr ::= LOAD_CONST
expr ::= LOAD_ASSERT
expr ::= LOAD_GLOBAL
expr ::= LOAD_DEREF
expr ::= LOAD_LOCALS
expr ::= load_attr
expr ::= binary_expr
expr ::= binary_expr_na
expr ::= build_list
expr ::= cmp
expr ::= mapexpr
expr ::= and
expr ::= and2
expr ::= or
expr ::= unary_expr
expr ::= call_function
expr ::= unary_not
expr ::= unary_convert
expr ::= binary_subscr
expr ::= binary_subscr2
expr ::= load_attr
expr ::= get_iter
expr ::= slice0
expr ::= slice1
expr ::= slice2
expr ::= slice3
expr ::= buildslice2
expr ::= buildslice3
expr ::= yield
binary_expr ::= expr expr binary_op
binary_op ::= BINARY_ADD
binary_op ::= BINARY_MULTIPLY
binary_op ::= BINARY_AND
binary_op ::= BINARY_OR
binary_op ::= BINARY_XOR
binary_op ::= BINARY_SUBTRACT
binary_op ::= BINARY_DIVIDE
binary_op ::= BINARY_TRUE_DIVIDE
binary_op ::= BINARY_FLOOR_DIVIDE
binary_op ::= BINARY_MODULO
binary_op ::= BINARY_LSHIFT
binary_op ::= BINARY_RSHIFT
binary_op ::= BINARY_POWER
unary_expr ::= expr unary_op
unary_op ::= UNARY_POSITIVE
unary_op ::= UNARY_NEGATIVE
unary_op ::= UNARY_INVERT
unary_not ::= expr UNARY_NOT
unary_convert ::= expr UNARY_CONVERT
binary_subscr ::= expr expr BINARY_SUBSCR
binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
load_attr ::= expr LOAD_ATTR
get_iter ::= expr GET_ITER
slice0 ::= expr SLICE+0
slice0 ::= expr DUP_TOP SLICE+0
slice1 ::= expr expr SLICE+1
slice1 ::= expr expr DUP_TOPX_2 SLICE+1
slice2 ::= expr expr SLICE+2
slice2 ::= expr expr DUP_TOPX_2 SLICE+2
slice3 ::= expr expr expr SLICE+3
slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3
buildslice3 ::= expr expr expr BUILD_SLICE_3
buildslice2 ::= expr expr BUILD_SLICE_2
yield ::= expr YIELD_VALUE
_mklambda ::= load_closure mklambda
_mklambda ::= mklambda
or ::= expr jmp_true expr _come_from
or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM
and ::= expr jmp_false expr _come_from
and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM
and2 ::= _jump jmp_false COME_FROM expr COME_FROM
expr ::= conditional
conditional ::= expr jmp_false expr JUMP_FORWARD expr COME_FROM
conditional ::= expr jmp_false expr JUMP_ABSOLUTE expr
expr ::= conditionalnot
conditionalnot ::= expr jmp_true expr _jump expr COME_FROM
stmt ::= return_lambda
stmt ::= conditional_lambda
stmt ::= conditional_lambda2
return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER
conditional_lambda ::= expr jmp_false return_if_stmt return_stmt LAMBDA_MARKER
cmp ::= cmp_list
cmp ::= compare
compare ::= expr expr COMPARE_OP
cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP
_come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list1 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list1 _come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list2 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list2 _come_from
cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD
cmp_list2 ::= expr COMPARE_OP RETURN_VALUE
mapexpr ::= BUILD_MAP kvlist
kvlist ::= kvlist kv
kvlist ::= kvlist kv2
kvlist ::= kvlist kv3
kvlist ::=
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
kv3 ::= expr expr STORE_MAP
exprlist ::= exprlist expr
exprlist ::= expr
nullexprlist ::=
'''
def nonterminal(self, nt, args):
collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items')
if nt in collect and len(args) > 1:
#
# Collect iterated thingies together.
#
rv = args[0]
rv.append(args[1])
else:
rv = GenericASTBuilder.nonterminal(self, nt, args)
return rv
def __ambiguity(self, children):
# only for debugging! to be removed hG/2000-10-15
print children
return GenericASTBuilder.ambiguity(self, children)
def resolve(self, list):
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
return 'funcdef'
if 'grammar' in list and 'expr' in list:
return 'expr'
#print >> sys.stderr, 'resolve', str(list)
return GenericASTBuilder.resolve(self, list)
nop = lambda self, args: None
p = Parser()
def parse(tokens, customize):
#
# Special handling for opcodes that take a variable number
# of arguments -- we add a new rule for each:
#
# expr ::= {expr}^n BUILD_LIST_n
# expr ::= {expr}^n BUILD_TUPLE_n
# unpack_list ::= UNPACK_LIST {expr}^n
# unpack ::= UNPACK_TUPLE {expr}^n
# unpack ::= UNPACK_SEQEUENE {expr}^n
# mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
# mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
# expr ::= expr {expr}^n CALL_FUNCTION_n
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
# expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
#
global p
for k, v in customize.items():
# avoid adding the same rule twice to this parser
if p.customized.has_key(k):
continue
p.customized[k] = None
#nop = lambda self, args: None
op = k[:string.rfind(k, '_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op == 'DUP_TOPX':
# no need to add a rule
continue
#rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
p.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop)
p.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
# rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) / 3
rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise Exception('unknown customize token %s' % k)
p.addRule(rule, nop)
ast = p.parse(tokens)
# p.cleanup()
return ast

849
uncompyle2/Scanner.py Executable file
View File

@@ -0,0 +1,849 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
class Token:
"""
Class representing a byte-code token.
A byte-code token is equivalent to the contents of one line
as output by dis.dis().
"""
def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False):
self.type = intern(type_)
self.attr = attr
self.pattr = pattr
self.offset = offset
self.linestart = linestart
def __cmp__(self, o):
if isinstance(o, Token):
# both are tokens: compare type and pattr
return cmp(self.type, o.type) or cmp(self.pattr, o.pattr)
else:
return cmp(self.type, o)
def __repr__(self): return str(self.type)
def __str__(self):
pattr = self.pattr
if self.linestart:
return '\n%s\t%-17s %r' % (self.offset, self.type, pattr)
else:
return '%s\t%-17s %r' % (self.offset, self.type, pattr)
def __hash__(self): return hash(self.type)
def __getitem__(self, i): raise IndexError
class Code:
"""
Class for representing code-objects.
This is similar to the original code object, but additionally
the diassembled code is stored in the attribute '_tokens'.
"""
def __init__(self, co, scanner, classname=None):
for i in dir(co):
if i.startswith('co_'):
setattr(self, i, getattr(co, i))
self._tokens, self._customize = scanner.disassemble(co, classname)
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = code = array('B', co.co_code)
n = len(code)
self.prev = [0]
# mapping adresses of instru & arg
for i in self.op_range(0, n):
c = code[i]
op = code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts contains bloc code adresse (addr,block)
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(code)
# contains (code, [addrRefToCode])
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if code[last_stmt] == PRINT_ITEM:
if code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if code[last_import] == IMPORT_NAME == code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = code[offset+1] + code[offset+2] * 256 + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.code[pos+1] + self.code[pos+2] * 256
if op in dis.hasjrel:
target += pos + 3
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
if target > pos:
unop_target = self.last_instr(pos, target, JF, target)
if unop_target and code[unop_target+3] != ROT_TWO:
self.fixed_jumps[pos] = unop_target
else:
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

945
uncompyle2/Scanner25.py Executable file
View File

@@ -0,0 +1,945 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from struct import *
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
n = len(self.code)
# linestarts contains bloc code adresse (addr,block)
self.linestarts = list(dis.findlinestarts(co))
self.prev = [0]
pop_delet = 0
i=0
self.restructRelativeJump()
# class and names
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.names = names
# add instruction to remonde in "toDel" list
toDel = []
while i < n-pop_delet:
op = self.code[i]
ret = self.getOpcodeToDel(i)
if ret != None:
toDel += ret
if op >= dis.HAVE_ARGUMENT:
i += 2
i += 1
if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud)
toDel = sorted(list(set(toDel)))
delta = 0
for x in toDel:
if self.code[x-delta] >= dis.HAVE_ARGUMENT:
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 3
else:
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 1
# mapping adresses of prev instru
n = len(self.code)
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
j = 0
linestarts = self.linestarts
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = self.code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(self.code)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = self.code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def getOpcodeToDel(self, i):
"""
check validity of the opcode at position I and return a list of opcode to delete
"""
opcode = self.code[i]
opsize = self.op_size(opcode)
if opcode == EXTENDED_ARG:
raise 'A faire'
if opcode in (PJIF,PJIT,JA,JF):
if self.code[i+opsize] == POP_TOP:
if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \
and opcode in (JF,JA) and self.code[i+opsize] != self.code[i+opsize+3]:
pass
else:
return [i+opsize]
if opcode == RAISE_VARARGS:
if self.code[i+opsize] == POP_TOP:
return [i+opsize]
if opcode == BUILD_LIST:
if self.code[i+opsize] == DUP_TOP and self.code[i+opsize+1] in (STORE_NAME,STORE_FAST):
# del DUP/STORE_NAME x
toDel = [i+opsize,i+opsize+1]
nameDel = self.get_argument(i+opsize+1)
start = i+opsize+1
end = start
# del LOAD_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (LOAD_NAME,LOAD_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == LOAD_NAME:
end += self.op_size(LOAD_NAME)
else:
end += self.op_size(LOAD_FAST)
# log JA/POP_TOP to del and update PJIF
while start < end:
start = self.first_instr(start, len(self.code), (PJIF))
if start == None: break
target = self.get_target(start)
if self.code[target] == POP_TOP and self.code[target-3] == JA:
toDel += [target, target-3]
# update PJIF
target = self.get_target(target-3)
if target > 0xFFFF:
raise 'A gerer'
self.code[start+1] = target & 0xFF
self.code[start+2] = (target >> 8) & 0xFF
start += self.op_size(PJIF)
# del DELETE_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (DELETE_NAME,DELETE_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == DELETE_NAME:
end += self.op_size(DELETE_NAME)
else:
end += self.op_size(DELETE_FAST)
return toDel
return None
def restructRelativeJump(self):
"""
change relative JUMP_IF_FALSE/TRUE to absolut jump
and remap the target of PJIF/PJIT
"""
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_argument(i)
target += i + 3
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_target(i)
if self.code[target] == JA:
target = self.get_target(target)
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
def restructCode(self, i):
"""
restruct linestarts and jump destination after removing a POP_TOP
"""
result = list()
for item in self.linestarts:
if i < item[0]:
result.append((item[0]-1, item[1]))
else:
result.append((item[0], item[1]))
self.linestarts = result
for x in self.op_range(0, len(self.code)):
op = self.code[x]
if op >= HAVE_ARGUMENT:
if op in dis.hasjrel:
if x < i and self.get_target(x) > i:
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
elif op in dis.hasjabs:
if i < self.get_target(x):
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in dis.hasjrel:
target += pos + 3
return target
def get_argument(self, pos):
target = self.code[pos+1] + self.code[pos+2] * 256
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, (PJIF))
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
if self.code[self.prev[i]] == NOP:
i = self.prev[i]
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_FINALLY):
count_SETUP_ += 1
#return self.lines[start].next
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if jmp == None: # check
i = self.next_stmt[i]
continue
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
#self.fixed_jumps[i] = jmp
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+2, # check
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
# if it's an old JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP
#if target > pos:
# unop_target = self.last_instr(pos, target, JF, target)
# if unop_target and code[unop_target+3] != ROT_TWO:
# self.fixed_jumps[pos] = unop_target
# else:
# self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.get_argument(i)
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
#elif op in hasjabs: Pas de gestion des jump abslt
#if op in (PJIF, PJIT): Or pop a faire
#if (oparg > i):
#label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

937
uncompyle2/Scanner26.py Executable file
View File

@@ -0,0 +1,937 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from struct import *
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
n = len(self.code)
# linestarts contains bloc code adresse (addr,block)
self.linestarts = list(dis.findlinestarts(co))
self.prev = [0]
pop_delet = 0
i=0
self.restructRelativeJump()
# class and names
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.names = names
# add instruction to remonde in "toDel" list
toDel = []
while i < n-pop_delet:
op = self.code[i]
ret = self.getOpcodeToDel(i)
if ret != None:
toDel += ret
if op >= dis.HAVE_ARGUMENT:
i += 2
i += 1
if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud)
toDel = sorted(list(set(toDel)))
delta = 0
for x in toDel:
if self.code[x-delta] >= dis.HAVE_ARGUMENT:
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 3
else:
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 1
# mapping adresses of prev instru
n = len(self.code)
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
j = 0
linestarts = self.linestarts
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = self.code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(self.code)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = self.code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def getOpcodeToDel(self, i):
"""
check validity of the opcode at position I and return a list of opcode to delete
"""
opcode = self.code[i]
opsize = self.op_size(opcode)
if opcode == EXTENDED_ARG:
raise 'A faire'
if opcode in (PJIF,PJIT,JA,JF):
if self.code[i+opsize] == POP_TOP:
if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \
and opcode in (JF,JA) and self.code[i+opsize] != self.code[i+opsize+3]:
pass
else:
return [i+opsize]
if opcode == BUILD_LIST:
if self.code[i+opsize] == DUP_TOP and self.code[i+opsize+1] in (STORE_NAME,STORE_FAST):
# del DUP/STORE_NAME x
toDel = [i+opsize,i+opsize+1]
nameDel = self.get_argument(i+opsize+1)
start = i+opsize+1
end = start
# del LOAD_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (LOAD_NAME,LOAD_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == LOAD_NAME:
end += self.op_size(LOAD_NAME)
else:
end += self.op_size(LOAD_FAST)
# log JA/POP_TOP to del and update PJIF
while start < end:
start = self.first_instr(start, len(self.code), (PJIF))
if start == None: break
target = self.get_target(start)
if self.code[target] == POP_TOP and self.code[target-3] == JA:
toDel += [target, target-3]
# update PJIF
target = self.get_target(target-3)
if target > 0xFFFF:
raise 'A gerer'
self.code[start+1] = target & 0xFF
self.code[start+2] = (target >> 8) & 0xFF
start += self.op_size(PJIF)
# del DELETE_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (DELETE_NAME,DELETE_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == DELETE_NAME:
end += self.op_size(DELETE_NAME)
else:
end += self.op_size(DELETE_FAST)
return toDel
return None
def restructRelativeJump(self):
"""
change relative JUMP_IF_FALSE/TRUE to absolut jump
"""
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_argument(i)
target += i + 3
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_target(i)
if self.code[target] == JA:
target = self.get_target(target)
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
def restructCode(self, i):
"""
restruct linestarts and jump destination after removing a POP_TOP
"""
result = list()
for item in self.linestarts:
if i < item[0]:
result.append((item[0]-1, item[1]))
else:
result.append((item[0], item[1]))
self.linestarts = result
for x in self.op_range(0, len(self.code)):
op = self.code[x]
if op >= HAVE_ARGUMENT:
if op in dis.hasjrel:
if x < i and self.get_target(x) > i:
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
elif op in dis.hasjabs:
if i < self.get_target(x):
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in dis.hasjrel:
target += pos + 3
return target
def get_argument(self, pos):
target = self.code[pos+1] + self.code[pos+2] * 256
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, (PJIF))
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
if self.code[self.prev[i]] == NOP:
i = self.prev[i]
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_FINALLY):
count_SETUP_ += 1
#return self.lines[start].next
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if jmp == None: # check
i = self.next_stmt[i]
continue
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
#self.fixed_jumps[i] = jmp
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+2, # check
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.get_argument(i)
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
#elif op in hasjabs: Pas de gestion des jump abslt
#if op in (PJIF, PJIT): Or pop a faire
#if (oparg > i):
#label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

796
uncompyle2/Scanner27.py Executable file
View File

@@ -0,0 +1,796 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = code = array('B', co.co_code)
n = len(code)
self.prev = [0]
# mapping adresses of instru & arg
for i in self.op_range(0, n):
op = code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts contains bloc code adresse (addr,block)
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(code)
# contains (code, [addrRefToCode])
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if code[last_stmt] == PRINT_ITEM:
if code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if code[last_import] == IMPORT_NAME == code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = code[offset+1] + code[offset+2] * 256 + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.code[pos+1] + self.code[pos+2] * 256
if op in dis.hasjrel:
target += pos + 3
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
if target > pos:
unop_target = self.last_instr(pos, target, JF, target)
if unop_target and code[unop_target+3] != ROT_TWO:
self.fixed_jumps[pos] = unop_target
else:
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

1418
uncompyle2/Walker.py Executable file

File diff suppressed because it is too large Load Diff

232
uncompyle2/__init__.py Executable file
View File

@@ -0,0 +1,232 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000 by hartmut Goebel <hartmut@goebel.noris.de>
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# See the file 'CHANGES' for a list of changes
#
# NB. This is not a masterpiece of software, but became more like a hack.
# Probably a complete rewrite would be sensefull. hG/2000-12-27
#
import sys, types, os
import Walker, verify, magics
sys.setrecursionlimit(5000)
__all__ = ['uncompyle_file', 'uncompyle_file', 'main']
def _load_file(filename):
"""
load a Python source file and compile it to byte-code
_load_module(filename: string): code_object
filename: name of file containing Python source code
(normally a .py)
code_object: code_object compiled from this source code
This function does NOT write any file!
"""
fp = open(filename, 'rb')
source = fp.read()+'\n'
try:
co = compile(source, filename, 'exec')
except SyntaxError:
print >> sys.stderr, '>>Syntax error in', filename
raise
fp.close()
return co
def _load_module(filename):
"""
load a module without importing it
_load_module(filename: string): code_object
filename: name of file containing Python byte-code object
(normally a .pyc)
code_object: code_object from this file
"""
import magics, marshal
fp = open(filename, 'rb')
magic = fp.read(4)
try:
version = float(magics.versions[magic])
except KeyError:
raise ImportError, "Unknown magic number %s in %s" % (ord(magic[0])+256*ord(magic[1]), filename)
if (version > 2.7) or (version < 2.5):
raise ImportError, "This is a Python %s file! Only Python 2.5 to 2.7 files are supported." % version
#print version
fp.read(4) # timestamp
co = marshal.load(fp)
fp.close()
return version, co
def uncompyle(version, co, out=None, showasm=0, showast=0):
"""
diassembles a given code block 'co'
"""
assert type(co) == types.CodeType
# store final output stream for case of error
__real_out = out or sys.stdout
if co.co_filename:
print >>__real_out, '#Embedded file name: %s' % co.co_filename
# diff scanner
if version == 2.7:
import Scanner27 as scan
elif version == 2.6:
import Scanner26 as scan
elif version == 2.5:
import Scanner25 as scan
scanner = scan.Scanner(version)
scanner.setShowAsm(showasm, out)
tokens, customize = scanner.disassemble(co)
#sys.exit(0)
# Build AST from disassembly.
walker = Walker.Walker(out, scanner, showast=showast)
try:
ast = walker.build_ast(tokens, customize)
except Walker.ParserError, e : # parser failed, dump disassembly
print >>__real_out, e
raise
del tokens # save memory
# convert leading '__doc__ = "..." into doc string
assert ast == 'stmts'
try:
if ast[0][0] == Walker.ASSIGN_DOC_STRING(co.co_consts[0]):
walker.print_docstring('', co.co_consts[0])
del ast[0]
if ast[-1] == Walker.RETURN_NONE:
ast.pop() # remove last node
#todo: if empty, add 'pass'
except:
pass
walker.mod_globs = Walker.find_globals(ast, set())
walker.gen_source(ast, customize)
for g in walker.mod_globs:
walker.write('global %s ## Warning: Unused global\n' % g)
if walker.ERROR:
raise walker.ERROR
def uncompyle_file(filename, outstream=None, showasm=0, showast=0):
"""
decompile Python byte-code file (.pyc)
"""
version, co = _load_module(filename)
uncompyle(version, co, outstream, showasm, showast)
co = None
#---- main -------
if sys.platform.startswith('linux') and os.uname()[2][:2] == '2.':
def __memUsage():
mi = open('/proc/self/stat', 'r')
mu = mi.readline().split()[22]
mi.close()
return int(mu) / 1000000
else:
def __memUsage():
return ''
def main(in_base, out_base, files, codes, outfile=None,
showasm=0, showast=0, do_verify=0):
"""
in_base base directory for input files
out_base base directory for output files (ignored when
files list of filenames to be uncompyled (relative to src_base)
outfile write output to this filename (overwrites out_base)
For redirecting output to
- <filename> outfile=<filename> (out_base is ignored)
- files below out_base out_base=...
- stdout out_base=None, outfile=None
"""
def _get_outstream(outfile):
dir = os.path.dirname(outfile)
failed_file = outfile + '_failed'
if os.path.exists(failed_file): os.remove(failed_file)
try:
os.makedirs(dir)
except OSError:
pass
return open(outfile, 'w')
of = outfile
tot_files = okay_files = failed_files = verify_failed_files = 0
for code in codes:
version = sys.version[:3] # "2.5"
with open(code, "r") as f:
co = compile(f.read(), "", "exec")
uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast)
for file in files:
infile = os.path.join(in_base, file)
#print >>sys.stderr, infile
if of: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, file) + '_dis'
outstream = _get_outstream(outfile)
#print >>sys.stderr, outfile
# try to decomyple the input file
try:
uncompyle_file(infile, outstream, showasm, showast)
tot_files += 1
except KeyboardInterrupt:
if outfile:
outstream.close()
os.remove(outfile)
raise
except:
failed_files += 1
sys.stderr.write("### Can't uncompyle %s\n" % infile)
if outfile:
outstream.close()
os.rename(outfile, outfile + '_failed')
import traceback
traceback.print_exc()
#raise
else: # uncompyle successfull
if outfile:
outstream.close()
if do_verify:
try:
verify.compare_code_with_srcfile(infile, outfile)
print "+++ okay decompyling", infile, __memUsage()
okay_files += 1
except verify.VerifyCmpError, e:
verify_failed_files += 1
os.rename(outfile, outfile + '_unverified')
print >>sys.stderr, "### Error Verifiying", file
print >>sys.stderr, e
else:
okay_files += 1
print "+++ okay decompyling", infile, __memUsage()
return (tot_files, okay_files, failed_files, verify_failed_files)

238
uncompyle2/disas.py Executable file
View File

@@ -0,0 +1,238 @@
"""Disassembler of Python byte code into mnemonics."""
import sys
import types
_have_code = (types.MethodType, types.FunctionType, types.CodeType, types.ClassType, type)
def dis(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
def distb(tb=None):
"""Disassemble a traceback (default: last traceback)."""
if tb is None:
try:
tb = sys.last_traceback
except AttributeError:
raise RuntimeError, "no last traceback to disassemble"
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti)
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
code = co.co_code
labels = findlabels(code)
linestarts = dict(findlinestarts(co))
n = len(code)
i = 0
extended_arg = 0
free = None
while i < n:
c = code[i]
op = ord(c)
if i in linestarts:
if i > 0:
print
print "%3d" % linestarts[i],
else:
print ' ',
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(20),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
print repr(oparg).rjust(5),
if op in hasconst:
print '(' + repr(co.co_consts[oparg]) + ')',
elif op in hasname:
print '(' + co.co_names[oparg] + ')',
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
print '(' + co.co_varnames[oparg] + ')',
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
print '(' + free[oparg] + ')',
print
def disassemble_string(code, lasti=-1, varnames=None, names=None,
constants=None):
labels = findlabels(code)
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(15),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
print repr(oparg).rjust(5),
if op in hasconst:
if constants:
print '(' + repr(constants[oparg]) + ')',
else:
print '(%d)'%oparg,
elif op in hasname:
if names is not None:
print '(' + names[oparg] + ')',
else:
print '(%d)'%oparg,
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
if varnames:
print '(' + varnames[oparg] + ')',
else:
print '(%d)' % oparg,
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
print
disco = disassemble # XXX For backwards compatibility
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
label = -1
if op in hasjrel:
label = i+oparg
elif op in hasjabs:
label = oparg
if label >= 0:
if label not in labels:
labels.append(label)
return labels
def findlinestarts(code):
"""Find the offsets in a byte code which are start of lines in the source.
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
line_increments = [ord(c) for c in code.co_lnotab[1::2]]
lastlineno = None
lineno = code.co_firstlineno
addr = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if lineno != lastlineno:
yield (addr, lineno)
lastlineno = lineno
addr += byte_incr
lineno += line_incr
if lineno != lastlineno:
yield (addr, lineno)
def setVersion(version):
if version == 2.7:
import uncompyle2.opcode.opcode_27 as opcodyn
elif version == 2.6:
import uncompyle2.opcode.opcode_26 as opcodyn
elif version == 2.5:
import uncompyle2.opcode.opcode_25 as opcodyn
globals().update({'cmp_op': opcodyn.cmp_op})
globals().update({'hasconst': opcodyn.hasconst})
globals().update({'hasname': opcodyn.hasname})
globals().update({'hasjrel': opcodyn.hasjrel})
globals().update({'hasjabs': opcodyn.hasjabs})
globals().update({'haslocal': opcodyn.haslocal})
globals().update({'hascompare': opcodyn.hascompare})
globals().update({'hasfree': opcodyn.hasfree})
globals().update({'opname': opcodyn.opname})
globals().update({'opmap': opcodyn.opmap})
globals().update({'HAVE_ARGUMENT': opcodyn.HAVE_ARGUMENT})
globals().update({'EXTENDED_ARG': opcodyn.EXTENDED_ARG})
def _test():
"""Simple test program to disassemble a file."""
if sys.argv[1:]:
if sys.argv[2:]:
sys.stderr.write("usage: python dis.py [-|file]\n")
sys.exit(2)
fn = sys.argv[1]
if not fn or fn == "-":
fn = None
else:
fn = None
if fn is None:
f = sys.stdin
else:
f = open(fn)
source = f.read()
if fn is not None:
f.close()
else:
fn = "<stdin>"
code = compile(source, fn, "exec")
dis(code)
if __name__ == "__main__":
_test()

66
uncompyle2/magics.py Executable file
View File

@@ -0,0 +1,66 @@
import struct
__all__ = ['magics', 'versions']
def __build_magic(magic):
return struct.pack('Hcc', magic, '\r', '\n')
def __by_version(magics):
by_version = {}
for m, v in magics.items():
by_version[v] = m
return by_version
versions = {
# taken from from Python/import.c
# magic, version
__build_magic(20121): '1.5', #1.5, 1.5.1, 1.5.2
__build_magic(50428): '1.6', #1.6
__build_magic(50823): '2.0', #2.0, 2.0.1
__build_magic(60202): '2.1', #2.1, 2.1.1, 2.1.2
__build_magic(60717): '2.2', #2.2
__build_magic(62011): '2.3', #2.3a0
__build_magic(62021): '2.3', #2.3a0
__build_magic(62041): '2.4', #2.4a0
__build_magic(62051): '2.4', #2.4a3
__build_magic(62061): '2.4', #2.4b1
__build_magic(62071): '2.5', #2.5a0
__build_magic(62081): '2.5', #2.5a0 (ast-branch)
__build_magic(62091): '2.5', #2.5a0 (with)
__build_magic(62092): '2.5', #2.5a0 (changed WITH_CLEANUP opcode)
__build_magic(62101): '2.5', #2.5b3 (fix wrong code: for x, in ...)
__build_magic(62111): '2.5', #2.5b3 (fix wrong code: x += yield)
__build_magic(62121): '2.5', #2.5c1 (fix wrong lnotab with for loops and
# storing constants that should have been removed
__build_magic(62131): '2.5', #2.5c2 (fix wrong code: for x, in ... in listcomp/genexp)
__build_magic(62151): '2.6', #2.6a0 (peephole optimizations & STORE_MAP)
__build_magic(62161): '2.6', #2.6a1 (WITH_CLEANUP optimization)
__build_magic(62171): '2.7', #2.7a0 (optimize list comprehensions/change LIST_APPEND)
__build_magic(62181): '2.7', #2.7a0 (optimize conditional branches:
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
__build_magic(62191): '2.7', #2.7a0 (introduce SETUP_WITH)
__build_magic(62201): '2.7', #2.7a0 (introduce BUILD_SET)
__build_magic(62211): '2.7' #2.7a0 (introduce MAP_ADD and SET_ADD)
}
magics = __by_version(versions)
def __show(text, magic):
print text, struct.unpack('BBBB', magic), \
struct.unpack('HBB', magic)
def test():
import imp
magic_20 = by_version['2.0']
current = imp.get_magic()
current_version = magics[current]
magic_current = by_version[ current_version ]
print type(magic_20), len(magic_20), repr(magic_20)
print
print 'This Python interpreter has version', current_version
__show('imp.get_magic():\t', current),
__show('magic[current_version]:\t', magic_current)
__show('magic_20:\t\t', magic_20)
if __name__ == '__main__':
test()

0
uncompyle2/opcode/__init__.py Executable file
View File

188
uncompyle2/opcode/opcode_23.py Executable file
View File

@@ -0,0 +1,188 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<' + `op` + '>'
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Always zero for now
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jrel_op('JUMP_IF_FALSE', 111) # ""
jrel_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

190
uncompyle2/opcode/opcode_24.py Executable file
View File

@@ -0,0 +1,190 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('LIST_APPEND', 18)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Always zero for now
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jrel_op('JUMP_IF_FALSE', 111) # ""
jrel_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

185
uncompyle2/opcode/opcode_25.py Executable file
View File

@@ -0,0 +1,185 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0) # 0
def_op('POP_TOP', 1) # 15
def_op('ROT_TWO', 2) # 59
def_op('ROT_THREE', 3) # 60
def_op('DUP_TOP', 4) # 13
def_op('ROT_FOUR', 5) # 49
def_op('NOP', 9) # 53
def_op('UNARY_POSITIVE', 10) # 48
def_op('UNARY_NEGATIVE', 11) # 54
def_op('UNARY_NOT', 12) # 38
def_op('UNARY_CONVERT', 13) # 25
def_op('UNARY_INVERT', 15) # 34
def_op('LIST_APPEND', 18) # 68
def_op('BINARY_POWER', 19) # 28
def_op('BINARY_MULTIPLY', 20) # 36
def_op('BINARY_DIVIDE', 21) # 12
def_op('BINARY_MODULO', 22) # 41
def_op('BINARY_ADD', 23) # 52
def_op('BINARY_SUBTRACT', 24) # 55
def_op('BINARY_SUBSCR', 25) # 4
def_op('BINARY_FLOOR_DIVIDE', 26) # 43
def_op('BINARY_TRUE_DIVIDE', 27) # 5
def_op('INPLACE_FLOOR_DIVIDE', 28) # 32
def_op('INPLACE_TRUE_DIVIDE', 29) # 30
def_op('SLICE+0', 30) # 16
def_op('SLICE+1', 31) # 17
def_op('SLICE+2', 32) # 18
def_op('SLICE+3', 33) # 19
def_op('STORE_SLICE+0', 40) # 61
def_op('STORE_SLICE+1', 41) # 62
def_op('STORE_SLICE+2', 42) # 63
def_op('STORE_SLICE+3', 43) # 64
def_op('DELETE_SLICE+0', 50) # 44
def_op('DELETE_SLICE+1', 51) # 45
def_op('DELETE_SLICE+2', 52) # 46
def_op('DELETE_SLICE+3', 53) # 47
def_op('INPLACE_ADD', 55) # 6
def_op('INPLACE_SUBTRACT', 56) # 29
def_op('INPLACE_MULTIPLY', 57) # 8
def_op('INPLACE_DIVIDE', 58) # 27
def_op('INPLACE_MODULO', 59) # 3
def_op('STORE_SUBSCR', 60) # 31
def_op('DELETE_SUBSCR', 61) # 69
def_op('BINARY_LSHIFT', 62) # 7
def_op('BINARY_RSHIFT', 63) # 22
def_op('BINARY_AND', 64) # 50
def_op('BINARY_XOR', 65) # 21
def_op('BINARY_OR', 66) # 2
def_op('INPLACE_POWER', 67) # 57
def_op('GET_ITER', 68) # 39
def_op('PRINT_EXPR', 70) # 20
def_op('PRINT_ITEM', 71) # 9
def_op('PRINT_NEWLINE', 72) # 14
def_op('PRINT_ITEM_TO', 73) # 33
def_op('PRINT_NEWLINE_TO', 74) # 35
def_op('INPLACE_LSHIFT', 75) # 11
def_op('INPLACE_RSHIFT', 76) # 58
def_op('INPLACE_AND', 77) # 24
def_op('INPLACE_XOR', 78) # 23
def_op('INPLACE_OR', 79) # 10
def_op('BREAK_LOOP', 80) # 40
def_op('WITH_CLEANUP', 81) # 37
def_op('LOAD_LOCALS', 82) # 51
def_op('RETURN_VALUE', 83) # 66
def_op('IMPORT_STAR', 84) # 56
def_op('EXEC_STMT', 85) # 65
def_op('YIELD_VALUE', 86) # 26
def_op('POP_BLOCK', 87) # 1
def_op('END_FINALLY', 88) # 67
def_op('BUILD_CLASS', 89) # 42
HAVE_ARGUMENT = 90 # 70 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # 95 # Index in name list
name_op('DELETE_NAME', 91) # 94 # ""
def_op('UNPACK_SEQUENCE', 92) # 93 # Number of tuple items
jrel_op('FOR_ITER', 93) # 81
name_op('STORE_ATTR', 95) # 84 # Index in name list
name_op('DELETE_ATTR', 96) # 87 # ""
name_op('STORE_GLOBAL', 97) # 105 # ""
name_op('DELETE_GLOBAL', 98) # 98 # ""
def_op('DUP_TOPX', 99) # 104 # number of items to duplicate
def_op('LOAD_CONST', 100) # 72 # Index in const list
hasconst.append(100) # 72
name_op('LOAD_NAME', 101) # 79 # Index in name list
def_op('BUILD_TUPLE', 102) # 80 # Number of tuple items
def_op('BUILD_LIST', 103) # 107 # Number of list items
def_op('BUILD_MAP', 104) # 78 # Always zero for now
name_op('LOAD_ATTR', 105) # 86 # Index in name list
def_op('COMPARE_OP', 106) # 101 # Comparison operator
hascompare.append(106) # 101
name_op('IMPORT_NAME', 107) # 88 # Index in name list
name_op('IMPORT_FROM', 108) # 89 # Index in name list
jrel_op('JUMP_FORWARD', 110) # 73 # Number of bytes to skip
jabs_op('JUMP_IF_FALSE', 111) # 83 # ""
jabs_op('JUMP_IF_TRUE', 112) # 90 # ""
jabs_op('JUMP_ABSOLUTE', 113) # 103 # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # 70 # Index in name list
jabs_op('CONTINUE_LOOP', 119) # 96 # Target address
jrel_op('SETUP_LOOP', 120) # 74 # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # 75 # ""
jrel_op('SETUP_FINALLY', 122) # 106 # ""
def_op('LOAD_FAST', 124) # 92 # Local variable number
haslocal.append(124) # 92
def_op('STORE_FAST', 125) # 82 # Local variable number
haslocal.append(125) # 82
def_op('DELETE_FAST', 126) # 71 # Local variable number
haslocal.append(126) # 71
def_op('RAISE_VARARGS', 130) # 91 # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # 102 # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # 76 # Number of args with default values
def_op('BUILD_SLICE', 133) # 77 # Number of items
def_op('MAKE_CLOSURE', 134) # 85
def_op('LOAD_CLOSURE', 135) # 97
hasfree.append(135) # 97
def_op('LOAD_DEREF', 136) # 99
hasfree.append(136) # 99
def_op('STORE_DEREF', 137) # 100
hasfree.append(137) # 100
def_op('CALL_FUNCTION_VAR', 140) # 111 # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # 112 # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # 113 # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143) # 114
EXTENDED_ARG = 143 # 114
del def_op, name_op, jrel_op, jabs_op

186
uncompyle2/opcode/opcode_26.py Executable file
View File

@@ -0,0 +1,186 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('LIST_APPEND', 18)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE', 111) # ""
jabs_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

192
uncompyle2/opcode/opcode_27.py Executable file
View File

@@ -0,0 +1,192 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
def_op('LIST_APPEND', 94)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_SET', 104) # Number of set items
def_op('BUILD_MAP', 105) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 106) # Index in name list
def_op('COMPARE_OP', 107) # Comparison operator
hascompare.append(107)
name_op('IMPORT_NAME', 108) # Index in name list
name_op('IMPORT_FROM', 109) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code
jabs_op('JUMP_IF_TRUE_OR_POP', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # ""
jabs_op('POP_JUMP_IF_FALSE', 114) # ""
jabs_op('POP_JUMP_IF_TRUE', 115) # ""
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
jrel_op('SETUP_WITH', 143)
def_op('EXTENDED_ARG', 145)
EXTENDED_ARG = 145
def_op('SET_ADD', 146)
def_op('MAP_ADD', 147)
del def_op, name_op, jrel_op, jabs_op

700
uncompyle2/spark.py Executable file
View File

@@ -0,0 +1,700 @@
# Copyright (c) 1998-2002 John Aycock
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
__version__ = 'SPARK-0.7 (pre-alpha-7) uncompyle trim'
def _namelist(instance):
namelist, namedict, classlist = [], {}, [instance.__class__]
for c in classlist:
for b in c.__bases__:
classlist.append(b)
for name in c.__dict__.keys():
if not namedict.has_key(name):
namelist.append(name)
namedict[name] = 1
return namelist
#
# Extracted from GenericParser and made global so that [un]picking works.
#
class _State:
def __init__(self, stateno, items):
self.T, self.complete, self.items = [], [], items
self.stateno = stateno
class GenericParser:
#
# An Earley parser, as per J. Earley, "An Efficient Context-Free
# Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
# "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
# Carnegie-Mellon University, August 1968. New formulation of
# the parser according to J. Aycock, "Practical Earley Parsing
# and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
# 2001, and J. Aycock and R. N. Horspool, "Practical Earley
# Parsing", unpublished paper, 2001.
#
def __init__(self, start):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
self.augment(start)
self.ruleschanged = 1
_NULLABLE = '\e_'
_START = 'START'
_BOF = '|-'
#
# When pickling, take the time to generate the full state machine;
# some information is then extraneous, too. Unfortunately we
# can't save the rule2func map.
#
def __getstate__(self):
if self.ruleschanged:
#
# XXX - duplicated from parse()
#
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
#
# XXX - should find a better way to do this..
#
changes = 1
while changes:
changes = 0
for k, v in self.edges.items():
if v is None:
state, sym = k
if self.states.has_key(state):
self.goto(state, sym)
changes = 1
rv = self.__dict__.copy()
for s in self.states.values():
del s.items
del rv['rule2func']
del rv['nullable']
del rv['cores']
return rv
def __setstate__(self, D):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
start = D['rules'][self._START][0][1][1] # Blech.
self.augment(start)
D['rule2func'] = self.rule2func
D['makeSet'] = self.makeSet_fast
self.__dict__ = D
#
# A hook for GenericASTBuilder and GenericASTMatcher. Mess
# thee not with this; nor shall thee toucheth the _preprocess
# argument to addRule.
#
def preprocess(self, rule, func): return rule, func
def addRule(self, doc, func, _preprocess=1):
fn = func
rules = doc.split()
index = []
for i in xrange(len(rules)):
if rules[i] == '::=':
index.append(i-1)
index.append(len(rules))
for i in xrange(len(index)-1):
lhs = rules[index[i]]
rhs = rules[index[i]+2:index[i+1]]
rule = (lhs, tuple(rhs))
if _preprocess:
rule, fn = self.preprocess(rule, func)
if self.rules.has_key(lhs):
self.rules[lhs].append(rule)
else:
self.rules[lhs] = [ rule ]
self.rule2func[rule] = fn
self.rule2name[rule] = func.__name__[2:]
self.ruleschanged = 1
def collectRules(self):
for name in _namelist(self):
if name[:2] == 'p_':
func = getattr(self, name)
doc = func.__doc__
self.addRule(doc, func)
def augment(self, start):
rule = '%s ::= %s %s' % (self._START, self._BOF, start)
self.addRule(rule, lambda args: args[1], 0)
def computeNull(self):
self.nullable = {}
tbd = []
for rulelist in self.rules.values():
lhs = rulelist[0][0]
self.nullable[lhs] = 0
for rule in rulelist:
rhs = rule[1]
if len(rhs) == 0:
self.nullable[lhs] = 1
continue
#
# We only need to consider rules which
# consist entirely of nonterminal symbols.
# This should be a savings on typical
# grammars.
#
for sym in rhs:
if not self.rules.has_key(sym):
break
else:
tbd.append(rule)
changes = 1
while changes:
changes = 0
for lhs, rhs in tbd:
if self.nullable[lhs]:
continue
for sym in rhs:
if not self.nullable[sym]:
break
else:
self.nullable[lhs] = 1
changes = 1
def makeState0(self):
s0 = _State(0, [])
for rule in self.newrules[self._START]:
s0.items.append((rule, 0))
return s0
def finalState(self, tokens):
#
# Yuck.
#
if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
return 1
start = self.rules[self._START][0][1][1]
return self.goto(1, start)
def makeNewRules(self):
worklist = []
for rulelist in self.rules.values():
for rule in rulelist:
worklist.append((rule, 0, 1, rule))
for rule, i, candidate, oldrule in worklist:
lhs, rhs = rule
n = len(rhs)
while i < n:
sym = rhs[i]
if not self.rules.has_key(sym) or \
not self.nullable[sym]:
candidate = 0
i = i + 1
continue
newrhs = list(rhs)
newrhs[i] = self._NULLABLE+sym
newrule = (lhs, tuple(newrhs))
worklist.append((newrule, i+1,
candidate, oldrule))
candidate = 0
i = i + 1
else:
if candidate:
lhs = self._NULLABLE+lhs
rule = (lhs, rhs)
if self.newrules.has_key(lhs):
self.newrules[lhs].append(rule)
else:
self.newrules[lhs] = [ rule ]
self.new2old[rule] = oldrule
def typestring(self, token):
return None
def error(self, token):
print "Syntax error at or near `%s' token" % token
raise SystemExit
def parse(self, tokens):
sets = [ [(1,0), (2,0)] ]
self.links = {}
if self.ruleschanged:
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
for i in xrange(len(tokens)):
sets.append([])
if sets[i] == []:
break
self.makeSet(tokens[i], sets, i)
else:
sets.append([])
self.makeSet(None, sets, len(tokens))
finalitem = (self.finalState(tokens), 0)
if finalitem not in sets[-2]:
if len(tokens) > 0:
self.error(tokens[i-1])
else:
self.error(None)
return self.buildTree(self._START, finalitem,
tokens, len(sets)-2)
def isnullable(self, sym):
#
# For symbols in G_e only. If we weren't supporting 1.5,
# could just use sym.startswith().
#
return self._NULLABLE == sym[0:len(self._NULLABLE)]
def skip(self, (lhs, rhs), pos=0):
n = len(rhs)
while pos < n:
if not self.isnullable(rhs[pos]):
break
pos = pos + 1
return pos
def makeState(self, state, sym):
assert sym is not None
#
# Compute \epsilon-kernel state's core and see if
# it exists already.
#
kitems = []
for rule, pos in self.states[state].items:
lhs, rhs = rule
if rhs[pos:pos+1] == (sym,):
kitems.append((rule, self.skip(rule, pos+1)))
tcore = tuple(sorted(kitems))
if self.cores.has_key(tcore):
return self.cores[tcore]
#
# Nope, doesn't exist. Compute it and the associated
# \epsilon-nonkernel state together; we'll need it right away.
#
k = self.cores[tcore] = len(self.states)
K, NK = _State(k, kitems), _State(k+1, [])
self.states[k] = K
predicted = {}
edges = self.edges
rules = self.newrules
for X in K, NK:
worklist = X.items
for item in worklist:
rule, pos = item
lhs, rhs = rule
if pos == len(rhs):
X.complete.append(rule)
continue
nextSym = rhs[pos]
key = (X.stateno, nextSym)
if not rules.has_key(nextSym):
if not edges.has_key(key):
edges[key] = None
X.T.append(nextSym)
else:
edges[key] = None
if not predicted.has_key(nextSym):
predicted[nextSym] = 1
for prule in rules[nextSym]:
ppos = self.skip(prule)
new = (prule, ppos)
NK.items.append(new)
#
# Problem: we know K needs generating, but we
# don't yet know about NK. Can't commit anything
# regarding NK to self.edges until we're sure. Should
# we delay committing on both K and NK to avoid this
# hacky code? This creates other problems..
#
if X is K:
edges = {}
if NK.items == []:
return k
#
# Check for \epsilon-nonkernel's core. Unfortunately we
# need to know the entire set of predicted nonterminals
# to do this without accidentally duplicating states.
#
tcore = tuple(sorted(predicted.keys()))
if self.cores.has_key(tcore):
self.edges[(k, None)] = self.cores[tcore]
return k
nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
self.edges.update(edges)
self.states[nk] = NK
return k
def goto(self, state, sym):
key = (state, sym)
if not self.edges.has_key(key):
#
# No transitions from state on sym.
#
return None
rv = self.edges[key]
if rv is None:
#
# Target state isn't generated yet. Remedy this.
#
rv = self.makeState(state, sym)
self.edges[key] = rv
return rv
def gotoT(self, state, t):
return [self.goto(state, t)]
def gotoST(self, state, st):
rv = []
for t in self.states[state].T:
if st == t:
rv.append(self.goto(state, t))
return rv
def add(self, set, item, i=None, predecessor=None, causal=None):
if predecessor is None:
if item not in set:
set.append(item)
else:
key = (item, i)
if item not in set:
self.links[key] = []
set.append(item)
self.links[key].append((predecessor, causal))
def makeSet(self, token, sets, i):
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
if ttype is not None:
fn, arg = self.gotoT, ttype
else:
fn, arg = self.gotoST, token
for item in cur:
ptr = (item, i)
state, parent = item
add = fn(state, arg)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
nk = self.goto(k, None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
k = self.goto(pstate, lhs)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
self.add(cur, (k, pparent),
i, pptr, why)
nk = self.goto(k, None)
if nk is not None:
self.add(cur, (nk, i))
def makeSet_fast(self, token, sets, i):
#
# Call *only* when the entire state machine has been built!
# It relies on self.edges being filled in completely, and
# then duplicates and inlines code to boost speed at the
# cost of extreme ugliness.
#
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
for item in cur:
ptr = (item, i)
state, parent = item
if ttype is not None:
k = self.edges.get((state, ttype), None)
if k is not None:
#self.add(next, (k, parent), i+1, ptr)
#INLINED --v
new = (k, parent)
key = (new, i+1)
if new not in next:
self.links[key] = []
next.append(new)
self.links[key].append((ptr, None))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(next, (nk, i+1))
#INLINED --v
new = (nk, i+1)
if new not in next:
next.append(new)
#INLINED --^
else:
add = self.gotoST(state, token)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
#k = self.goto(pstate, lhs)
k = self.edges.get((pstate, lhs), None)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
#self.add(cur, (k, pparent),
# i, pptr, why)
#INLINED --v
new = (k, pparent)
key = (new, i)
if new not in cur:
self.links[key] = []
cur.append(new)
self.links[key].append((pptr, why))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(cur, (nk, i))
#INLINED --v
new = (nk, i)
if new not in cur:
cur.append(new)
#INLINED --^
def predecessor(self, key, causal):
for p, c in self.links[key]:
if c == causal:
return p
assert 0
def causal(self, key):
links = self.links[key]
if len(links) == 1:
return links[0][1]
choices = []
rule2cause = {}
for p, c in links:
rule = c[2]
choices.append(rule)
rule2cause[rule] = c
return rule2cause[self.ambiguity(choices)]
def deriveEpsilon(self, nt):
if len(self.newrules[nt]) > 1:
rule = self.ambiguity(self.newrules[nt])
else:
rule = self.newrules[nt][0]
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in xrange(len(rhs)-1, -1, -1):
attr[i] = self.deriveEpsilon(rhs[i])
return self.rule2func[self.new2old[rule]](attr)
def buildTree(self, nt, item, tokens, k):
state, parent = item
choices = []
for rule in self.states[state].complete:
if rule[0] == nt:
choices.append(rule)
rule = choices[0]
if len(choices) > 1:
rule = self.ambiguity(choices)
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in xrange(len(rhs)-1, -1, -1):
sym = rhs[i]
if not self.newrules.has_key(sym):
if sym != self._BOF:
attr[i] = tokens[k-1]
key = (item, k)
item, k = self.predecessor(key, None)
#elif self.isnullable(sym):
elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
attr[i] = self.deriveEpsilon(sym)
else:
key = (item, k)
why = self.causal(key)
attr[i] = self.buildTree(sym, why[0],
tokens, why[1])
item, k = self.predecessor(key, why)
return self.rule2func[self.new2old[rule]](attr)
def ambiguity(self, rules):
#
# XXX - problem here and in collectRules() if the same rule
# appears in >1 method. Also undefined results if rules
# causing the ambiguity appear in the same method.
#
sortlist = []
name2index = {}
for i in xrange(len(rules)):
lhs, rhs = rule = rules[i]
name = self.rule2name[self.new2old[rule]]
sortlist.append((len(rhs), name))
name2index[name] = i
sortlist.sort()
list = map(lambda (a,b): b, sortlist)
return rules[name2index[self.resolve(list)]]
def resolve(self, list):
#
# Resolve ambiguity in favor of the shortest RHS.
# Since we walk the tree from the top down, this
# should effectively resolve in favor of a "shift".
#
return list[0]
#
# GenericASTBuilder automagically constructs a concrete/abstract syntax tree
# for a given input. The extra argument is a class (not an instance!)
# which supports the "__setslice__" and "__len__" methods.
#
# XXX - silently overrides any user code in methods.
#
class GenericASTBuilder(GenericParser):
def __init__(self, AST, start):
GenericParser.__init__(self, start)
self.AST = AST
def preprocess(self, rule, func):
rebind = lambda lhs, self=self: \
lambda args, lhs=lhs, self=self: \
self.buildASTNode(args, lhs)
lhs, rhs = rule
return rule, rebind(lhs)
def buildASTNode(self, args, lhs):
children = []
for arg in args:
if isinstance(arg, self.AST):
children.append(arg)
else:
children.append(arg)
return self.nonterminal(lhs, children)
def nonterminal(self, type, args):
rv = self.AST(type)
rv[:len(args)] = args
return rv
#
# GenericASTTraversal is a Visitor pattern according to Design Patterns. For
# each node it attempts to invoke the method n_<node type>, falling
# back onto the default() method if the n_* can't be found. The preorder
# traversal also looks for an exit hook named n_<node type>_exit (no default
# routine is called if it's not found). To prematurely halt traversal
# of a subtree, call the prune() method -- this only makes sense for a
# preorder traversal. Node type is determined via the typestring() method.
#
class GenericASTTraversalPruningException:
pass
class GenericASTTraversal:
def __init__(self, ast):
self.ast = ast
def typestring(self, node):
return node.type
def prune(self):
raise GenericASTTraversalPruningException
def preorder(self, node=None):
if node is None:
node = self.ast
try:
name = 'n_' + self.typestring(node)
if hasattr(self, name):
func = getattr(self, name)
func(node)
else:
self.default(node)
except GenericASTTraversalPruningException:
return
for kid in node:
self.preorder(kid)
name = name + '_exit'
if hasattr(self, name):
func = getattr(self, name)
func(node)
def default(self, node):
pass

335
uncompyle2/verify.py Executable file
View File

@@ -0,0 +1,335 @@
#
# (C) Copyright 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
#
# byte-code verifier for uncompyle
#
import types
import operator
import dis
import uncompyle2, Scanner
BIN_OP_FUNCS = {
'BINARY_POWER': operator.pow,
'BINARY_MULTIPLY': operator.mul,
'BINARY_DIVIDE': operator.div,
'BINARY_FLOOR_DIVIDE': operator.floordiv,
'BINARY_TRUE_DIVIDE': operator.truediv,
'BINARY_MODULO' : operator.mod,
'BINARY_ADD': operator.add,
'BINARY_SUBRACT': operator.sub,
'BINARY_LSHIFT': operator.lshift,
'BINARY_RSHIFT': operator.rshift,
'BINARY_AND': operator.and_,
'BINARY_XOR': operator.xor,
'BINARY_OR': operator.or_,
}
JUMP_OPs = None
#--- exceptions ---
class VerifyCmpError(Exception):
pass
class CmpErrorConsts(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Compare Error within Consts of %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsType(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Consts type differ in %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsLen(VerifyCmpError):
"""Exception to be raised when length of co_consts differs."""
def __init__(self, name, consts1, consts2):
self.name = name
self.consts = (consts1, consts2)
def __str__(self):
return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \
(repr(self.name),
len(self.consts[0]), `self.consts[0]`,
len(self.consts[1]), `self.consts[1]`)
class CmpErrorCode(VerifyCmpError):
"""Exception to be raised when code differs."""
def __init__(self, name, index, token1, token2, tokens1, tokens2):
self.name = name
self.index = index
self.token1 = token1
self.token2 = token2
self.tokens = [tokens1, tokens2]
def __str__(self):
s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code differs in %s\n' % str(self.name))
return ('Code differs in %s at offset %s [%s] != [%s]\n\n' % \
(repr(self.name), self.index,
repr(self.token1), repr(self.token2))) + s
class CmpErrorCodeLen(VerifyCmpError):
"""Exception to be raised when code length differs."""
def __init__(self, name, tokens1, tokens2):
self.name = name
self.tokens = [tokens1, tokens2]
def __str__(self):
return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code len differs in %s\n' % str(self.name))
class CmpErrorMember(VerifyCmpError):
"""Exception to be raised when other members differ."""
def __init__(self, name, member, data1, data2):
self.name = name
self.member = member
self.data = (data1, data2)
def __str__(self):
return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \
(repr(self.member), repr(self.name),
repr(self.data[0]), repr(self.data[1]))
#--- compare ---
# these members are ignored
__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names']
def cmp_code_objects(version, code_obj1, code_obj2, name=''):
"""
Compare two code-objects.
This is the main part of this module.
"""
#print code_obj1, type(code_obj2)
assert type(code_obj1) == types.CodeType
assert type(code_obj2) == types.CodeType
#print dir(code_obj1)
if isinstance(code_obj1, object):
# new style classes (Python 2.2)
# assume _both_ code objects to be new stle classes
assert dir(code_obj1) == dir(code_obj2)
else:
# old style classes
assert dir(code_obj1) == code_obj1.__members__
assert dir(code_obj2) == code_obj2.__members__
assert code_obj1.__members__ == code_obj2.__members__
if name == '__main__':
name = code_obj1.co_name
else:
name = '%s.%s' % (name, code_obj1.co_name)
if name == '.?': name = '__main__'
if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2):
# use the new style code-classes' __cmp__ method, which
# should be faster and more sophisticated
# if this compare fails, we use the old routine to
# find out, what exactly is nor equal
# if this compare succeds, simply return
#return
pass
if isinstance(code_obj1, object):
members = filter(lambda x: x.startswith('co_'), dir(code_obj1))
else:
members = dir(code_obj1);
members.sort(); #members.reverse()
tokens1 = None
for member in members:
if member in __IGNORE_CODE_MEMBERS__:
pass
elif member == 'co_code':
scanner = Scanner.getscanner(version)
scanner.setShowAsm( showasm=0 )
global JUMP_OPs
JUMP_OPs = scanner.JUMP_OPs + ['JUMP_BACK']
# use changed Token class
# we (re)set this here to save exception handling,
# which would get 'unubersichtlich'
scanner.setTokenClass(Token)
try:
# disassemble both code-objects
tokens1,customize = scanner.disassemble(code_obj1)
del customize # save memory
tokens2,customize = scanner.disassemble(code_obj2)
del customize # save memory
finally:
scanner.resetTokenClass() # restore Token class
targets1 = dis.findlabels(code_obj1.co_code)
tokens1 = [t for t in tokens1 if t.type != 'COME_FROM']
tokens2 = [t for t in tokens2 if t.type != 'COME_FROM']
i1 = 0; i2 = 0
offset_map = {}; check_jumps = {}
while i1 < len(tokens1):
if i2 >= len(tokens2):
if len(tokens1) == len(tokens2) + 2 \
and tokens1[-1].type == 'RETURN_VALUE' \
and tokens1[-2].type == 'LOAD_CONST' \
and tokens1[-2].pattr == None \
and tokens1[-3].type == 'RETURN_VALUE':
break
else:
raise CmpErrorCodeLen(name, tokens1, tokens2)
offset_map[tokens1[i1].offset] = tokens2[i2].offset
for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
if offset2 != tokens2[i2].offset:
raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1],
tokens2[idx2], tokens1, tokens2)
if tokens1[i1] != tokens2[i2]:
if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type:
i = 1
while tokens1[i1+i].type == 'LOAD_CONST':
i += 1
if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \
and i == int(tokens1[i1+i].type.split('_')[-1]):
t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ])
if t != tokens2[i2].pattr:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
i1 += i + 1
i2 += 1
continue
elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2':
i1 += 3
i2 += 2
continue
elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS:
f = BIN_OP_FUNCS[tokens1[i1+i].type]
if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr:
i1 += 3
i2 += 1
continue
elif tokens1[i1].type == 'UNARY_NOT':
if tokens2[i2].type == 'POP_JUMP_IF_TRUE':
if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE':
i1 += 2
i2 += 1
continue
elif tokens2[i2].type == 'POP_JUMP_IF_FALSE':
if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE':
i1 += 2
i2 += 1
continue
elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \
and tokens1[i1-1].type == 'RETURN_VALUE' \
and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \
and int(tokens1[i1].offset) not in targets1:
i1 += 1
continue
elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \
and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \
and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3:
if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset):
i1 += 2
i2 += 2
continue
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr:
dest1 = int(tokens1[i1].pattr)
dest2 = int(tokens2[i2].pattr)
if tokens1[i1].type == 'JUMP_BACK':
if offset_map[dest1] != dest2:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
else:
#import pdb; pdb.set_trace()
if dest1 in check_jumps:
check_jumps[dest1].append((i1,i2,dest2))
else:
check_jumps[dest1] = [(i1,i2,dest2)]
i1 += 1
i2 += 1
del tokens1, tokens2 # save memory
elif member == 'co_consts':
# partial optimization can make the co_consts look different,
# so we'll just compare the code consts
codes1 = ( c for c in code_obj1.co_consts if type(c) == types.CodeType )
codes2 = ( c for c in code_obj2.co_consts if type(c) == types.CodeType )
for c1, c2 in zip(codes1, codes2):
cmp_code_objects(version, c1, c2, name=name)
else:
# all other members must be equal
if getattr(code_obj1, member) != getattr(code_obj2, member):
raise CmpErrorMember(name, member,
getattr(code_obj1,member),
getattr(code_obj2,member))
class Token(Scanner.Token):
"""Token class with changed semantics for 'cmp()'."""
def __cmp__(self, o):
t = self.type # shortcut
loads = ('LOAD_NAME', 'LOAD_GLOBAL', 'LOAD_CONST')
if t in loads and o.type in loads:
if self.pattr == 'None' and o.pattr == None:
return 0
if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == ():
return 0
if t == 'COME_FROM' == o.type:
return 0
if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM':
return 0
if t == 'RETURN_VALUE' and o.type == 'RETURN_END_IF':
return 0
if t == 'JUMP_IF_FALSE_OR_POP' and o.type == 'POP_JUMP_IF_FALSE':
return 0
if t in JUMP_OPs:
# ignore offset
return cmp(t, o.type)
return cmp(t, o.type) or cmp(self.pattr, o.pattr)
def __repr__(self):
return '%s %s (%s)' % (str(self.type), str(self.attr),
repr(self.pattr))
def __str__(self):
return '%s\t%-17s %r' % (self.offset, self.type, self.pattr)
def compare_code_with_srcfile(pyc_filename, src_filename):
"""Compare a .pyc with a source code file."""
version, code_obj1 = uncompyle2._load_module(pyc_filename)
code_obj2 = uncompyle2._load_file(src_filename)
cmp_code_objects(version, code_obj1, code_obj2)
def compare_files(pyc_filename1, pyc_filename2):
"""Compare two .pyc files."""
version, code_obj1 = uncompyle2._load_module(pyc_filename1)
version, code_obj2 = uncompyle2._load_module(pyc_filename2)
cmp_code_objects(version, code_obj1, code_obj2)
if __name__ == '__main__':
t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52)
t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55)
print `t1`
print `t2`
print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr)