marshal.py: Python2 marshal code shouldn't try to turn a code object

into a string. parse3.py: handle both keyword and positional function
calls. scanner34.py: Remove extra level of quoting in LOAD_CONST.
Keyward handling now works cross Python 2/3. Some other spelling and doc fixes.
This commit is contained in:
rocky
2015-12-18 21:15:54 -05:00
parent 347219a009
commit 6bc425b45e
12 changed files with 63 additions and 44 deletions

View File

@@ -1,7 +1,4 @@
from __future__ import print_function
'''
"""
Copyright (c) 1999 John Aycock
Copyright (c) 2000 by hartmut Goebel <h.goebel@crazy-compilers.com>
Copyright (c) 2015 by Rocky Bernstein
@@ -25,11 +22,11 @@ from __future__ import print_function
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
See the file 'CHANGES' for a list of changes
NB. This is not a masterpiece of software, but became more like a hack.
Probably a complete rewrite would be sensefull. hG/2000-12-27
'''
"""
from __future__ import print_function
import imp, os, marshal, sys, types

View File

@@ -44,7 +44,7 @@ def load_code(fp, magic_int):
internStrings = []
return load_code_internal(fp, magic_int)
def load_code_internal(fp, magic_int):
def load_code_internal(fp, magic_int, bytes_for_s=False):
global internStrings
marshalType = fp.read(1).decode('utf-8')
@@ -62,7 +62,8 @@ def load_code_internal(fp, magic_int):
# a range here.
if 3000 < magic_int < 20121:
fp.read(4)
co_code = load_code_internal(fp, magic_int)
co_code = load_code_internal(fp, magic_int, bytes_for_s=True)
co_consts = load_code_internal(fp, magic_int)
co_names = load_code_internal(fp, magic_int)
co_varnames = load_code_internal(fp, magic_int)
@@ -81,13 +82,13 @@ def load_code_internal(fp, magic_int):
# In later Python3 magic_ints, there is a
# kwonlyargcount parameter which we set to 0.
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
bytes(co_code, encoding='utf-8'),
co_code,
co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
co_freevars, co_cellvars)
else:
return Code(co_argcount, 0, co_nlocals, co_stacksize, co_flags,
bytes(co_code, encoding='utf-8'),
co_code,
co_consts, co_names, co_varnames, co_filename, co_name,
co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
co_freevars, co_cellvars)
@@ -144,7 +145,10 @@ def load_code_internal(fp, magic_int):
return internStrings[refnum]
elif marshalType == 's':
strsize = unpack('i', fp.read(4))[0]
return compat_str(fp.read(strsize))
s = fp.read(strsize)
if not bytes_for_s:
s = compat_str(s)
return s
elif marshalType == 't':
strsize = unpack('i', fp.read(4))[0]
interned = compat_str(fp.read(strsize))

View File

@@ -647,29 +647,42 @@ class Python3Parser(PythonParser):
'''
def add_custom_rules(self, tokens, customize):
"""
Special handling for opcodes that take a variable number
of arguments -- we add a new rule for each:
expr ::= {expr}^n BUILD_LIST_n
expr ::= {expr}^n BUILD_TUPLE_n
unpack_list ::= UNPACK_LIST {expr}^n
unpack ::= UNPACK_TUPLE {expr}^n
unpack ::= UNPACK_SEQEUENE {expr}^n
mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_n
expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
"""
new_rules = set()
for token in tokens:
if token.type != 'CALL_FUNCTION':
if token.type not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
continue
# Low byte indicates number of positional paramters,
# high byte number of positional parameters
args_pos = token.attr & 0xff
args_kw = (token.attr >> 8) & 0xff
pos_args_line = '' if args_pos == 0 else ' {}'.format(' '.join('expr' for _ in range(args_pos)))
kw_args_line = '' if args_kw == 0 else ' {}'.format(' '.join('kwarg' for _ in range(args_kw)))
if args_kw == 0:
token.type = 'CALL_FUNCTION_%i' % (args_pos)
rule = ('call_function ::= expr%s%s %s' %
(pos_args_line, kw_args_line, token.type))
# Make sure we do not add the same rule twice
if rule not in new_rules:
new_rules.add(rule)
self.addRule(rule, nop_func)
customize[token.type] = args_pos
pass
else:
assert False, "Can't handle kw args yet"
new_rules.difference_update(self.added_rules)
for rule in new_rules:
self.addRule(rule, nop_func)
self.added_rules.update(new_rules)
nak = ( len(token.type)-len('CALL_FUNCTION') ) // 3
token.type = 'CALL_FUNCTION_%i' % token.attr
rule = ('call_function ::= expr '
+ ('expr ' * args_pos)
+ ('kwarg ' * args_kw)
+ 'expr ' * nak + token.type)
# Make sure we do not add the same rule twice
if rule not in new_rules:
new_rules.add(rule)
self.addRule(rule, nop_func)
customize[token.type] = args_pos
pass
pass
return

View File

@@ -327,6 +327,9 @@ class Scanner(object):
def get_scanner(version):
# Pick up appropriate scanner
# from trepan.api import debug;
# debug(start_opts={'startup-profile': True})
if version == 2.7:
import uncompyle6.scanners.scanner27 as scan
scanner = scan.Scanner27()

View File

@@ -4,7 +4,6 @@
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein
#
# See main module for license.
"""
Python 2.5 bytecode scanner/deparser
@@ -163,9 +162,9 @@ class Scanner25(scan.Scanner):
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# verify uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'

View File

@@ -2,8 +2,6 @@
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein
#
# See main module for license.
"""
Python 2.6 bytecode scanner
@@ -158,9 +156,9 @@ class Scanner26(scan.Scanner):
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# verify uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'

View File

@@ -34,6 +34,7 @@ class Scanner27(scan.Scanner):
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
for i in self.op_range(0, len(self.code)):
if self.code[i] in (RETURN_VALUE, END_FINALLY):
n = i + 1
@@ -127,6 +128,7 @@ class Scanner27(scan.Scanner):
op = self.code[offset]
op_name = opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
@@ -147,9 +149,9 @@ class Scanner27(scan.Scanner):
op_name = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
op_name = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# verify() uses 'pattr' for comparison, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
# for comparison (todo: think about changing this)
# pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'

View File

@@ -2,8 +2,6 @@
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2015 by Rocky Bernstein
#
# See main module for license.
"""
Python 3.2 bytecode scanner/deparser

View File

@@ -60,11 +60,16 @@ class Scanner34(scan.Scanner):
jump_idx += 1
pass
pass
# For constants, the pattr is the same as attr. Using pattr adds
# an extra level of quotes which messes other things up, like getting
# keyword attribute names in a call. I suspect there will be things
# other than LOAD_CONST, but we'll start out with just this for now.
pattr = inst.argval if inst.opname in ['LOAD_CONST'] else inst.argrepr
tokens.append(
Token(
type_ = inst.opname,
attr = inst.argval,
pattr = inst.argrepr,
pattr = pattr,
offset = inst.offset,
linestart = inst.starts_line,
)