Files
python-uncompyle6/uncompyle6/parser.py
2016-07-27 13:19:42 -04:00

715 lines
23 KiB
Python

# Copyright (c) 2015-2016 Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
# Copyright (c) 1999 John Aycock
"""
Common uncompyle parser routines.
"""
from __future__ import print_function
import sys
from xdis.code import iscode
from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.show import maybe_show_asm
class ParserError(Exception):
def __init__(self, token, offset):
self.token = token
self.offset = offset
def __str__(self):
return "Parse error at or near `%r' instruction at offset %s\n" % \
(self.token, self.offset)
nop_func = lambda self, args: None
class PythonParser(GenericASTBuilder):
def add_unique_rule(self, rule, opname, count, customize):
"""Add rule to grammar, but only if it hasn't been added previously
opname and count are used in the customize() semantic the actions
to add the semantic action rule. Often, count is not used.
"""
if rule not in self.new_rules:
# print("XXX ", rule) # debug
self.new_rules.add(rule)
self.addRule(rule, nop_func)
customize[opname] = count
pass
return
def add_unique_rules(self, rules, customize):
"""Add rules to grammar
"""
for rule in rules:
opname = rule.split('::=')[0].strip()
self.add_unique_rule(rule, opname, 0, customize)
return
def cleanup(self):
"""
Remove recursive references to allow garbage
collector to collect this object.
"""
for dict in (self.rule2func, self.rules, self.rule2name):
for i in list(dict.keys()):
dict[i] = None
for i in dir(self):
setattr(self, i, None)
def error(self, instructions, index):
# Find the last line boundary
for start in range(index, -1, -1):
if instructions[start].linestart: break
pass
for finish in range(index+1, len(instructions)):
if instructions[finish].linestart: break
pass
err_token = instructions[index]
print("Instruction context:")
for i in range(start, finish):
indent = ' ' if i != index else '-> '
print("%s%s" % (indent, instructions[i]))
raise ParserError(err_token, err_token.offset)
def typestring(self, token):
return token.type
def nonterminal(self, nt, args):
collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items', 'kwargs',
# PYPY:
'kvlist_n')
if nt in collect and len(args) > 1:
#
# Collect iterated thingies together. That is rather than
# stmts -> stmts stmt -> stmts stmt -> ...
# stmms -> stmt stmt ...
#
rv = args[0]
rv.append(args[1])
else:
rv = GenericASTBuilder.nonterminal(self, nt, args)
return rv
def __ambiguity(self, children):
# only for debugging! to be removed hG/2000-10-15
print(children)
return GenericASTBuilder.ambiguity(self, children)
def resolve(self, list):
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
return 'funcdef'
if 'grammar' in list and 'expr' in list:
return 'expr'
# print >> sys.stderr, 'resolve', str(list)
return GenericASTBuilder.resolve(self, list)
##############################################
## Common Python 2 and Python 3 grammar rules
##############################################
def p_start(self, args):
'''
# The start or goal symbol
stmts ::= stmts sstmt
stmts ::= sstmt
'''
def p_call_stmt(self, args):
'''
# eval-mode compilation. Single-mode interactive compilation
# adds another rule.
call_stmt ::= expr POP_TOP
'''
def p_stmt(self, args):
"""
passstmt ::=
_stmts ::= _stmts stmt
_stmts ::= stmt
# statements with continue
c_stmts ::= _stmts
c_stmts ::= _stmts lastc_stmt
c_stmts ::= lastc_stmt
c_stmts ::= continue_stmts
lastc_stmt ::= iflaststmt
lastc_stmt ::= whileelselaststmt
lastc_stmt ::= forelselaststmt
lastc_stmt ::= ifelsestmtr
lastc_stmt ::= ifelsestmtc
lastc_stmt ::= tryelsestmtc
c_stmts_opt ::= c_stmts
c_stmts_opt ::= passstmt
l_stmts ::= _stmts
l_stmts ::= return_stmts
l_stmts ::= continue_stmts
l_stmts ::= _stmts lastl_stmt
l_stmts ::= lastl_stmt
lastl_stmt ::= iflaststmtl
lastl_stmt ::= ifelsestmtl
lastl_stmt ::= forelselaststmtl
lastl_stmt ::= tryelsestmtl
l_stmts_opt ::= l_stmts
l_stmts_opt ::= passstmt
suite_stmts ::= _stmts
suite_stmts ::= return_stmts
suite_stmts ::= continue_stmts
suite_stmts_opt ::= suite_stmts
suite_stmts_opt ::= passstmt
else_suite ::= suite_stmts
else_suitel ::= l_stmts
else_suitec ::= c_stmts
else_suitec ::= return_stmts
stmt ::= classdef
stmt ::= call_stmt
stmt ::= return_stmt
return_stmt ::= ret_expr RETURN_VALUE
return_stmts ::= return_stmt
return_stmts ::= _stmts return_stmt
"""
pass
def p_funcdef(self, args):
'''
stmt ::= funcdef
funcdef ::= mkfunc designator
stmt ::= funcdefdeco
funcdefdeco ::= mkfuncdeco designator
mkfuncdeco ::= expr mkfuncdeco CALL_FUNCTION_1
mkfuncdeco ::= expr mkfuncdeco0 CALL_FUNCTION_1
mkfuncdeco0 ::= mkfunc
load_closure ::= load_closure LOAD_CLOSURE
load_closure ::= LOAD_CLOSURE
'''
def p_genexpr(self, args):
'''
expr ::= genexpr
stmt ::= genexpr_func
genexpr_func ::= LOAD_FAST FOR_ITER designator comp_iter JUMP_BACK
'''
def p_jump(self, args):
"""
_jump ::= JUMP_ABSOLUTE
_jump ::= JUMP_FORWARD
_jump ::= JUMP_BACK
# Zero or more COME_FROMs
# loops can have this
_come_from ::= _come_from COME_FROM
_come_from ::=
# Zero or one COME_FROM
# And/or expressions have this
come_from_opt ::= COME_FROM
come_from_opt ::=
"""
def p_dictcomp(self, args):
'''
expr ::= dictcomp
stmt ::= dictcomp_func
dictcomp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER designator
comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST
'''
def p_augmented_assign(self, args):
'''
stmt ::= augassign1
stmt ::= augassign2
augassign1 ::= expr expr inplace_op designator
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
inplace_op ROT_TWO STORE_ATTR
inplace_op ::= INPLACE_ADD
inplace_op ::= INPLACE_SUBTRACT
inplace_op ::= INPLACE_MULTIPLY
inplace_op ::= INPLACE_DIVIDE
inplace_op ::= INPLACE_TRUE_DIVIDE
inplace_op ::= INPLACE_FLOOR_DIVIDE
inplace_op ::= INPLACE_MODULO
inplace_op ::= INPLACE_POWER
inplace_op ::= INPLACE_LSHIFT
inplace_op ::= INPLACE_RSHIFT
inplace_op ::= INPLACE_AND
inplace_op ::= INPLACE_XOR
inplace_op ::= INPLACE_OR
'''
def p_assign(self, args):
'''
stmt ::= assign
assign ::= expr DUP_TOP designList
assign ::= expr designator
stmt ::= assign2
stmt ::= assign3
assign2 ::= expr expr ROT_TWO designator designator
assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator
'''
def p_forstmt(self, args):
"""
_for ::= GET_ITER FOR_ITER
_for ::= LOAD_CONST FOR_LOOP
for_block ::= l_stmts_opt _come_from JUMP_BACK
for_block ::= return_stmts _come_from
forstmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK _come_from
forelsestmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suite _come_from
forelselaststmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitec _come_from
forelselaststmtl ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitel _come_from
"""
def p_whilestmt(self, args):
"""
whilestmt ::= SETUP_LOOP
testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK _come_from
whilestmt ::= SETUP_LOOP
testexpr
return_stmts
POP_BLOCK COME_FROM
while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM
while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK POP_BLOCK COME_FROM
while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite COME_FROM
whileelsestmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suite COME_FROM
whileelselaststmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suitec COME_FROM
"""
def p_import20(self, args):
'''
stmt ::= importstmt
stmt ::= importfrom
stmt ::= importstar
stmt ::= importmultiple
importlist2 ::= importlist2 import_as
importlist2 ::= import_as
import_as ::= IMPORT_NAME designator
import_as ::= IMPORT_NAME load_attrs designator
import_as ::= IMPORT_FROM designator
importstmt ::= LOAD_CONST LOAD_CONST import_as
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP
importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont
imports_cont ::= imports_cont import_cont
imports_cont ::= import_cont
import_cont ::= LOAD_CONST LOAD_CONST import_as_cont
import_as_cont ::= IMPORT_NAME_CONT designator
import_as_cont ::= IMPORT_NAME_CONT load_attrs designator
import_as_cont ::= IMPORT_FROM designator
load_attrs ::= LOAD_ATTR
load_attrs ::= load_attrs LOAD_ATTR
'''
def p_list_comprehension(self, args):
"""
expr ::= list_compr
list_compr ::= BUILD_LIST_0 list_iter
list_iter ::= list_for
list_iter ::= list_if
list_iter ::= list_if_not
list_iter ::= lc_body
list_if ::= expr jmp_false list_iter
list_if_not ::= expr jmp_true list_iter
lc_body ::= expr LIST_APPEND
"""
def p_setcomp(self, args):
"""
expr ::= setcomp
setcomp ::= LOAD_SETCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= setcomp_func
setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter
JUMP_BACK RETURN_VALUE RETURN_LAST
comp_iter ::= comp_if
comp_iter ::= comp_ifnot
comp_iter ::= comp_for
comp_iter ::= comp_body
comp_body ::= set_comp_body
comp_body ::= gen_comp_body
comp_body ::= dict_comp_body
set_comp_body ::= expr SET_ADD
gen_comp_body ::= expr YIELD_VALUE POP_TOP
dict_comp_body ::= expr expr MAP_ADD
comp_if ::= expr jmp_false comp_iter
comp_ifnot ::= expr jmp_true comp_iter
comp_for ::= expr _for designator comp_iter JUMP_BACK
"""
def p_expr(self, args):
'''
expr ::= _mklambda
expr ::= SET_LINENO
expr ::= LOAD_FAST
expr ::= LOAD_NAME
expr ::= LOAD_CONST
expr ::= LOAD_GLOBAL
expr ::= LOAD_DEREF
expr ::= load_attr
expr ::= binary_expr
expr ::= build_list
expr ::= cmp
expr ::= mapexpr
expr ::= and
expr ::= and2
expr ::= or
expr ::= unary_expr
expr ::= call_function
expr ::= unary_not
expr ::= unary_convert
expr ::= binary_subscr
expr ::= binary_subscr2
expr ::= load_attr
expr ::= get_iter
expr ::= slice0
expr ::= slice1
expr ::= slice2
expr ::= slice3
expr ::= buildslice2
expr ::= buildslice3
expr ::= yield
binary_expr ::= expr expr binary_op
binary_op ::= BINARY_ADD
binary_op ::= BINARY_MULTIPLY
binary_op ::= BINARY_AND
binary_op ::= BINARY_OR
binary_op ::= BINARY_XOR
binary_op ::= BINARY_SUBTRACT
binary_op ::= BINARY_DIVIDE
binary_op ::= BINARY_TRUE_DIVIDE
binary_op ::= BINARY_FLOOR_DIVIDE
binary_op ::= BINARY_MODULO
binary_op ::= BINARY_LSHIFT
binary_op ::= BINARY_RSHIFT
binary_op ::= BINARY_POWER
unary_expr ::= expr unary_op
unary_op ::= UNARY_POSITIVE
unary_op ::= UNARY_NEGATIVE
unary_op ::= UNARY_INVERT
unary_not ::= expr UNARY_NOT
unary_convert ::= expr UNARY_CONVERT
binary_subscr ::= expr expr BINARY_SUBSCR
binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
load_attr ::= expr LOAD_ATTR
get_iter ::= expr GET_ITER
slice0 ::= expr SLICE+0
slice0 ::= expr DUP_TOP SLICE+0
slice1 ::= expr expr SLICE+1
slice1 ::= expr expr DUP_TOPX_2 SLICE+1
slice2 ::= expr expr SLICE+2
slice2 ::= expr expr DUP_TOPX_2 SLICE+2
slice3 ::= expr expr expr SLICE+3
slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3
buildslice3 ::= expr expr expr BUILD_SLICE_3
buildslice2 ::= expr expr BUILD_SLICE_2
yield ::= expr YIELD_VALUE
_mklambda ::= load_closure mklambda
_mklambda ::= mklambda
# Note: Python < 2.7 doesn't have *POP* or this. Remove from here?
# FIXME: segregate 2.7+
or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM
and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM
or ::= expr jmp_true expr come_from_opt
and ::= expr jmp_false expr come_from_opt
and2 ::= _jump jmp_false COME_FROM expr COME_FROM
expr ::= conditional
conditional ::= expr jmp_false expr JUMP_FORWARD expr COME_FROM
conditional ::= expr jmp_false expr JUMP_ABSOLUTE expr
expr ::= conditionalnot
conditionalnot ::= expr jmp_true expr _jump expr COME_FROM
ret_expr ::= expr
ret_expr ::= ret_and
ret_expr ::= ret_or
ret_expr_or_cond ::= ret_expr
ret_expr_or_cond ::= ret_cond
ret_expr_or_cond ::= ret_cond_not
# Note: Python < 2.7 doesn't have *POP* or this. Remove from here?
# FIXME: segregate 2.7+
ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM
ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM
ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond
ret_cond_not ::= expr POP_JUMP_IF_TRUE expr RETURN_END_IF ret_expr_or_cond
stmt ::= return_lambda
stmt ::= conditional_lambda
return_lambda ::= ret_expr RETURN_VALUE LAMBDA_MARKER
conditional_lambda ::= expr jmp_false return_if_stmt return_stmt LAMBDA_MARKER
cmp ::= cmp_list
cmp ::= compare
compare ::= expr expr COMPARE_OP
cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP
_come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list1 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list1 _come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list2 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list2 _come_from
cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD
cmp_list2 ::= expr COMPARE_OP RETURN_VALUE
mapexpr ::= BUILD_MAP kvlist
kvlist ::= kvlist kv
kvlist ::= kvlist kv2
kvlist ::= kvlist kv3
kvlist ::=
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
kv3 ::= expr expr STORE_MAP
exprlist ::= exprlist expr
exprlist ::= expr
# Positional arguments in make_function
pos_arg ::= expr
expr32 ::= expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr
expr1024 ::= expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32
'''
def p_designator(self, args):
'''
# Note. The below is right-recursive:
designList ::= designator designator
designList ::= designator DUP_TOP designList
## Can we replace with left-recursive, and redo with:
##
## designList ::= designLists designator designator
## designLists ::= designLists designator DUP_TOP
## designLists ::=
## Will need to redo semantic actiion
designator ::= STORE_FAST
designator ::= STORE_NAME
designator ::= STORE_GLOBAL
designator ::= STORE_DEREF
designator ::= expr STORE_ATTR
designator ::= expr STORE_SLICE+0
designator ::= expr expr STORE_SLICE+1
designator ::= expr expr STORE_SLICE+2
designator ::= expr expr expr STORE_SLICE+3
designator ::= store_subscr
store_subscr ::= expr expr STORE_SUBSCR
designator ::= unpack
designator ::= unpack_list
'''
def parse(p, tokens, customize):
p.add_custom_rules(tokens, customize)
ast = p.parse(tokens)
# p.cleanup()
return ast
def get_python_parser(
version, debug_parser={}, compile_mode='exec',
is_pypy = False):
"""Returns parser object for Python version 2 or 3, 3.2, 3.5on,
etc., depending on the parameters passed. *compile_mode* is either
'exec', 'eval', or 'single'. See
https://docs.python.org/3.6/library/functions.html#compile for an
explanation of the different modes.
"""
# FIXME: there has to be a better way...
if version < 3.0:
if version == 2.3:
import uncompyle6.parsers.parse23 as parse23
if compile_mode == 'exec':
p = parse23.Python23Parser(debug_parser)
else:
p = parse23.Python23ParserSingle(debug_parser)
elif version == 2.4:
import uncompyle6.parsers.parse24 as parse24
if compile_mode == 'exec':
p = parse24.Python24Parser(debug_parser)
else:
p = parse24.Python24ParserSingle(debug_parser)
elif version == 2.5:
import uncompyle6.parsers.parse25 as parse25
if compile_mode == 'exec':
p = parse25.Python25Parser(debug_parser)
else:
p = parse25.Python25ParserSingle(debug_parser)
elif version == 2.6:
import uncompyle6.parsers.parse26 as parse26
if compile_mode == 'exec':
p = parse26.Python26Parser(debug_parser)
else:
p = parse26.Python26ParserSingle(debug_parser)
elif version == 2.7:
import uncompyle6.parsers.parse27 as parse27
if compile_mode == 'exec':
p = parse27.Python27Parser(debug_parser)
else:
p = parse27.Python27ParserSingle(debug_parser)
else:
import uncompyle6.parsers.parse2 as parse2
if compile_mode == 'exec':
p = parse2.Python2Parser(debug_parser)
else:
p = parse2.Python2ParserSingle(debug_parser)
pass
pass
pass
else:
import uncompyle6.parsers.parse3 as parse3
if version == 3.2:
if compile_mode == 'exec':
p = parse3.Python32Parser(debug_parser)
else:
p = parse3.Python32ParserSingle(debug_parser)
elif version == 3.3:
if compile_mode == 'exec':
p = parse3.Python33Parser(debug_parser)
else:
p = parse3.Python33ParserSingle(debug_parser)
elif version == 3.4:
import uncompyle6.parsers.parse34 as parse34
if compile_mode == 'exec':
p = parse34.Python34Parser(debug_parser)
else:
p = parse34.Python34ParserSingle(debug_parser)
elif version >= 3.5:
if compile_mode == 'exec':
p = parse3.Python35onParser(debug_parser)
else:
p = parse3.Python35onParserSingle(debug_parser)
else:
if compile_mode == 'exec':
p = parse3.Python3Parser(debug_parser)
else:
p = parse3.Python3ParserSingle(debug_parser)
p.version = version
# p.dumpGrammar() # debug
return p
class PythonParserSingle(PythonParser):
def p_call_stmt_single(self, args):
'''
# single-mode compilation. Eval-mode interactive compilation
# drops the last rule.
call_stmt ::= expr PRINT_EXPR
'''
def python_parser(version, co, out=sys.stdout, showasm=False,
parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False):
"""
Parse a code object to an abstract syntax tree representation.
:param version: The python version this code is from as a float, for
example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
:param co: The code object to parse.
:param out: File like object to write the output to.
:param showasm: Flag which determines whether the disassembled code
is written to sys.stdout or not. (It is also to
pass a file like object, into which the asm will be
written).
:param parser_debug: dict containing debug flags for the spark parser.
:return: Abstract syntax tree representation of the code object.
"""
assert iscode(co)
from uncompyle6.scanner import get_scanner
scanner = get_scanner(version, is_pypy)
tokens, customize = scanner.disassemble(co)
maybe_show_asm(showasm, tokens)
# For heavy grammar debugging
parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
'showstack': 'full'}
p = get_python_parser(version, parser_debug)
return parse(p, tokens, customize)
if __name__ == '__main__':
def parse_test(co):
from uncompyle6 import PYTHON_VERSION, IS_PYPY
ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY)
print(ast)
return
parse_test(parse_test.__code__)