marshal.py: Python2 marshal code shouldn't try to turn a code object

into a string. parse3.py: handle both keyword and positional function
calls. scanner34.py: Remove extra level of quoting in LOAD_CONST.
Keyward handling now works cross Python 2/3. Some other spelling and doc fixes.
This commit is contained in:
rocky
2015-12-18 21:15:54 -05:00
parent 347219a009
commit 6bc425b45e
12 changed files with 63 additions and 44 deletions

View File

@@ -16,7 +16,7 @@ check-2.7: check-short-2.7 check-bytecode check-2.7-ok
check-3.4: check-short-2.7 check-bytecode check-native-short check-3.4: check-short-2.7 check-bytecode check-native-short
check: check:
@echo "For now, use check-2.7 or check.3.4" && false @echo "For now, use check-2.7 or check-3.4" && false
## FIXME: there is a bug in our code that I don't ## FIXME: there is a bug in our code that I don't
## find in uncompyle2 that causes this to fail. ## find in uncompyle2 that causes this to fail.

Binary file not shown.

Binary file not shown.

View File

@@ -1,7 +1,4 @@
from __future__ import print_function """
'''
Copyright (c) 1999 John Aycock Copyright (c) 1999 John Aycock
Copyright (c) 2000 by hartmut Goebel <h.goebel@crazy-compilers.com> Copyright (c) 2000 by hartmut Goebel <h.goebel@crazy-compilers.com>
Copyright (c) 2015 by Rocky Bernstein Copyright (c) 2015 by Rocky Bernstein
@@ -25,11 +22,11 @@ from __future__ import print_function
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
See the file 'CHANGES' for a list of changes
NB. This is not a masterpiece of software, but became more like a hack. NB. This is not a masterpiece of software, but became more like a hack.
Probably a complete rewrite would be sensefull. hG/2000-12-27 Probably a complete rewrite would be sensefull. hG/2000-12-27
''' """
from __future__ import print_function
import imp, os, marshal, sys, types import imp, os, marshal, sys, types

View File

@@ -44,7 +44,7 @@ def load_code(fp, magic_int):
internStrings = [] internStrings = []
return load_code_internal(fp, magic_int) return load_code_internal(fp, magic_int)
def load_code_internal(fp, magic_int): def load_code_internal(fp, magic_int, bytes_for_s=False):
global internStrings global internStrings
marshalType = fp.read(1).decode('utf-8') marshalType = fp.read(1).decode('utf-8')
@@ -62,7 +62,8 @@ def load_code_internal(fp, magic_int):
# a range here. # a range here.
if 3000 < magic_int < 20121: if 3000 < magic_int < 20121:
fp.read(4) fp.read(4)
co_code = load_code_internal(fp, magic_int)
co_code = load_code_internal(fp, magic_int, bytes_for_s=True)
co_consts = load_code_internal(fp, magic_int) co_consts = load_code_internal(fp, magic_int)
co_names = load_code_internal(fp, magic_int) co_names = load_code_internal(fp, magic_int)
co_varnames = load_code_internal(fp, magic_int) co_varnames = load_code_internal(fp, magic_int)
@@ -81,13 +82,13 @@ def load_code_internal(fp, magic_int):
# In later Python3 magic_ints, there is a # In later Python3 magic_ints, there is a
# kwonlyargcount parameter which we set to 0. # kwonlyargcount parameter which we set to 0.
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
bytes(co_code, encoding='utf-8'), co_code,
co_consts, co_names, co_varnames, co_filename, co_name, co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
co_freevars, co_cellvars) co_freevars, co_cellvars)
else: else:
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags, return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
bytes(co_code, encoding='utf-8'), co_code,
co_consts, co_names, co_varnames, co_filename, co_name, co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, bytes(co_lnotab, encoding='utf-8'), co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
co_freevars, co_cellvars) co_freevars, co_cellvars)
@@ -144,7 +145,10 @@ def load_code_internal(fp, magic_int):
return internStrings[refnum] return internStrings[refnum]
elif marshalType == 's': elif marshalType == 's':
strsize = unpack('i', fp.read(4))[0] strsize = unpack('i', fp.read(4))[0]
return compat_str(fp.read(strsize)) s = fp.read(strsize)
if not bytes_for_s:
s = compat_str(s)
return s
elif marshalType == 't': elif marshalType == 't':
strsize = unpack('i', fp.read(4))[0] strsize = unpack('i', fp.read(4))[0]
interned = compat_str(fp.read(strsize)) interned = compat_str(fp.read(strsize))

View File

@@ -647,29 +647,42 @@ class Python3Parser(PythonParser):
''' '''
def add_custom_rules(self, tokens, customize): def add_custom_rules(self, tokens, customize):
"""
Special handling for opcodes that take a variable number
of arguments -- we add a new rule for each:
expr ::= {expr}^n BUILD_LIST_n
expr ::= {expr}^n BUILD_TUPLE_n
unpack_list ::= UNPACK_LIST {expr}^n
unpack ::= UNPACK_TUPLE {expr}^n
unpack ::= UNPACK_SEQEUENE {expr}^n
mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
"""
new_rules = set() new_rules = set()
for token in tokens: for token in tokens:
if token.type != 'CALL_FUNCTION': if token.type not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
continue continue
# Low byte indicates number of positional paramters, # Low byte indicates number of positional paramters,
# high byte number of positional parameters # high byte number of positional parameters
args_pos = token.attr & 0xff args_pos = token.attr & 0xff
args_kw = (token.attr >> 8) & 0xff args_kw = (token.attr >> 8) & 0xff
pos_args_line = '' if args_pos == 0 else ' {}'.format(' '.join('expr' for _ in range(args_pos))) nak = ( len(token.type)-len('CALL_FUNCTION') ) // 3
kw_args_line = '' if args_kw == 0 else ' {}'.format(' '.join('kwarg' for _ in range(args_kw))) token.type = 'CALL_FUNCTION_%i' % token.attr
if args_kw == 0: rule = ('call_function ::= expr '
token.type = 'CALL_FUNCTION_%i' % (args_pos) + ('expr ' * args_pos)
rule = ('call_function ::= expr%s%s %s' % + ('kwarg ' * args_kw)
(pos_args_line, kw_args_line, token.type)) + 'expr ' * nak + token.type)
# Make sure we do not add the same rule twice # Make sure we do not add the same rule twice
if rule not in new_rules: if rule not in new_rules:
new_rules.add(rule) new_rules.add(rule)
self.addRule(rule, nop_func) self.addRule(rule, nop_func)
customize[token.type] = args_pos customize[token.type] = args_pos
pass pass
else: pass
assert False, "Can't handle kw args yet" return
new_rules.difference_update(self.added_rules)
for rule in new_rules:
self.addRule(rule, nop_func)
self.added_rules.update(new_rules)

View File

@@ -327,6 +327,9 @@ class Scanner(object):
def get_scanner(version): def get_scanner(version):
# Pick up appropriate scanner # Pick up appropriate scanner
# from trepan.api import debug;
# debug(start_opts={'startup-profile': True})
if version == 2.7: if version == 2.7:
import uncompyle6.scanners.scanner27 as scan import uncompyle6.scanners.scanner27 as scan
scanner = scan.Scanner27() scanner = scan.Scanner27()

View File

@@ -4,7 +4,6 @@
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein # Copyright (c) 2015 by Rocky Bernstein
# #
# See main module for license.
""" """
Python 2.5 bytecode scanner/deparser Python 2.5 bytecode scanner/deparser
@@ -163,9 +162,9 @@ class Scanner25(scan.Scanner):
op_name = 'LOAD_DICTCOMP' op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>': elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP' op_name = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr' # verify uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used # now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this) # for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' % # pattr = 'code_object @ 0x%x %s->%s' %
# (id(const), const.co_filename, const.co_name) # (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>' pattr = '<code_object ' + const.co_name + '>'

View File

@@ -2,8 +2,6 @@
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein # Copyright (c) 2015 by Rocky Bernstein
#
# See main module for license.
""" """
Python 2.6 bytecode scanner Python 2.6 bytecode scanner
@@ -158,9 +156,9 @@ class Scanner26(scan.Scanner):
op_name = 'LOAD_DICTCOMP' op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>': elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP' op_name = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr' # verify uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used # now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this) # for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\ # pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name) # (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>' pattr = '<code_object ' + const.co_name + '>'

View File

@@ -34,6 +34,7 @@ class Scanner27(scan.Scanner):
customize = {} customize = {}
Token = self.Token # shortcut Token = self.Token # shortcut
self.code = array('B', co.co_code) self.code = array('B', co.co_code)
for i in self.op_range(0, len(self.code)): for i in self.op_range(0, len(self.code)):
if self.code[i] in (RETURN_VALUE, END_FINALLY): if self.code[i] in (RETURN_VALUE, END_FINALLY):
n = i + 1 n = i + 1
@@ -127,6 +128,7 @@ class Scanner27(scan.Scanner):
op = self.code[offset] op = self.code[offset]
op_name = opname[op] op_name = opname[op]
oparg = None; pattr = None oparg = None; pattr = None
if op >= HAVE_ARGUMENT: if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg oparg = self.get_argument(offset) + extended_arg
@@ -147,9 +149,9 @@ class Scanner27(scan.Scanner):
op_name = 'LOAD_DICTCOMP' op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>': elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP' op_name = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr' # verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used # now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this) # for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\ # pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name) # (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>' pattr = '<code_object ' + const.co_name + '>'

View File

@@ -2,8 +2,6 @@
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com> # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org> # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein # Copyright (c) 2015 by Rocky Bernstein
#
# See main module for license.
""" """
Python 3.2 bytecode scanner/deparser Python 3.2 bytecode scanner/deparser

View File

@@ -60,11 +60,16 @@ class Scanner34(scan.Scanner):
jump_idx += 1 jump_idx += 1
pass pass
pass pass
# For constants, the pattr is the same as attr. Using pattr adds
# an extra level of quotes which messes other things up, like getting
# keyword attribute names in a call. I suspect there will be things
# other than LOAD_CONST, but we'll start out with just this for now.
pattr = inst.argval if inst.opname in ['LOAD_CONST'] else inst.argrepr
tokens.append( tokens.append(
Token( Token(
type_ = inst.opname, type_ = inst.opname,
attr = inst.argval, attr = inst.argval,
pattr = inst.argrepr, pattr = pattr,
offset = inst.offset, offset = inst.offset,
linestart = inst.starts_line, linestart = inst.starts_line,
) )