# Copyright (c) 2015-2019 Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ Common uncompyle6 parser routines. """ from __future__ import print_function import sys from xdis.code import iscode from xdis.magics import py_str2float from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG from uncompyle6.show import maybe_show_asm class ParserError(Exception): def __init__(self, token, offset): self.token = token self.offset = offset def __str__(self): return "Parse error at or near `%r' instruction at offset %s\n" % \ (self.token, self.offset) def nop_func(self, args): return None class PythonParser(GenericASTBuilder): def __init__(self, SyntaxTree, start, debug): super(PythonParser, self).__init__(SyntaxTree, start, debug) # FIXME: customize per python parser version # These are the non-terminals we should collect into a list. # For example instead of: # stmts -> stmts stmt -> stmts stmt stmt ... # collect as stmts -> stmt stmt ... nt_list = [ 'stmts', 'except_stmts', '_stmts', 'attributes', 'exprlist', 'kvlist', 'kwargs', 'come_froms', '_come_froms', 'importlist', # Python < 3 'print_items', # PyPy: 'imports_cont', 'kvlist_n', # Python 3.6+ 'come_from_loops', ] self.collect = frozenset(nt_list) # For these items we need to keep the 1st epslion reduction since # the nonterminal name is used in a semantic action. self.keep_epsilon = frozenset(('kvlist_n', 'kvlist')) # ??? Do we need a debug option to skip eliding singleton reductions? # Time will tell if it if useful in debugging # FIXME: optional_nt is a misnomer. It's really about there being a # singleton reduction that we can simplify. It also happens to be optional # in its other derivation self.optional_nt |= frozenset(('come_froms', 'suite_stmts', 'l_stmts_opt', 'c_stmts_opt')) # Reduce singleton reductions in these nonterminals: # FIXME: would love to do expr, sstmts, stmts and # so on but that would require major changes to the # semantic actions self.singleton = frozenset(('str', 'store', '_stmts', 'suite_stmts_opt', 'inplace_op')) # Instructions filled in from scanner self.insts = [] def ast_first_offset(self, ast): if hasattr(ast, 'offset'): return ast.offset else: return self.ast_first_offset(ast[0]) def add_unique_rule(self, rule, opname, arg_count, customize): """Add rule to grammar, but only if it hasn't been added previously opname and stack_count are used in the customize() semantic the actions to add the semantic action rule. Stack_count is used in custom opcodes like MAKE_FUNCTION to indicate how many arguments it has. Often it is not used. """ if rule not in self.new_rules: # print("XXX ", rule) # debug self.new_rules.add(rule) self.addRule(rule, nop_func) customize[opname] = arg_count pass return def add_unique_rules(self, rules, customize): """Add rules (a list of string) to grammar. Note that the rules must not be those that set arg_count in the custom dictionary. """ for rule in rules: if len(rule) == 0: continue opname = rule.split('::=')[0].strip() self.add_unique_rule(rule, opname, 0, customize) return def add_unique_doc_rules(self, rules_str, customize): """Add rules (a docstring-like list of rules) to grammar. Note that the rules must not be those that set arg_count in the custom dictionary. """ rules = [r.strip() for r in rules_str.split("\n")] self.add_unique_rules(rules, customize) return def cleanup(self): """ Remove recursive references to allow garbage collector to collect this object. """ for dict in (self.rule2func, self.rules, self.rule2name): for i in list(dict.keys()): dict[i] = None for i in dir(self): setattr(self, i, None) def debug_reduce(self, rule, tokens, parent, last_token_pos): """Customized format and print for our kind of tokens which gets called in debugging grammar reduce rules """ def fix(c): s = str(c) last_token_pos = s.find('_') if last_token_pos == -1: return s else: return s[:last_token_pos] prefix = '' if parent and tokens: p_token = tokens[parent] if hasattr(p_token, 'linestart') and p_token.linestart: prefix = 'L.%3d: ' % p_token.linestart else: prefix = ' ' if hasattr(p_token, 'offset'): prefix += "%3s" % fix(p_token.offset) if len(rule[1]) > 1: prefix += '-%-3s ' % fix(tokens[last_token_pos-1].offset) else: prefix += ' ' else: prefix = ' ' print("%s%s ::= %s (%d)" % (prefix, rule[0], ' '.join(rule[1]), last_token_pos)) def error(self, instructions, index): # Find the last line boundary start, finish = -1, -1 for start in range(index, -1, -1): if instructions[start].linestart: break pass for finish in range(index+1, len(instructions)): if instructions[finish].linestart: break pass if start > 0: err_token = instructions[index] print("Instruction context:") for i in range(start, finish): if i != index: indent = ' ' else: indent = '-> ' print("%s%s" % (indent, instructions[i])) raise ParserError(err_token, err_token.offset) else: raise ParserError(None, -1) def get_pos_kw(self, token): """Return then the number of positional parameters and represented by the attr field of token""" # Low byte indicates number of positional paramters, # high byte number of keyword parameters args_pos = token.attr & 0xff args_kw = (token.attr >> 8) & 0xff return args_pos, args_kw def nonterminal(self, nt, args): n = len(args) # # Use this to find lots of singleton rule # if n == 1 and nt not in self.singleton: # print("XXX", nt) if nt in self.collect and n > 1: # # Collect iterated thingies together. That is rather than # stmts -> stmts stmt -> stmts stmt -> ... # stmms -> stmt stmt ... # if not hasattr(args[0], 'append'): # Was in self.optional_nt as a single item, but we find we have # more than one now... rv = GenericASTBuilder.nonterminal(self, nt, [args[0]]) else: rv = args[0] pass # In a list-like entity where the first item goes to epsilon, # drop that and save the 2nd item as the first one if len(rv) == 0 and nt not in self.keep_epsilon: rv = args[1] else: rv.append(args[1]) elif n == 1 and args[0] in self.singleton: rv = GenericASTBuilder.nonterminal(self, nt, args[0]) del args[0] # save memory elif n == 1 and nt in self.optional_nt: rv = args[0] else: rv = GenericASTBuilder.nonterminal(self, nt, args) return rv def __ambiguity(self, children): # only for debugging! to be removed hG/2000-10-15 print(children) return GenericASTBuilder.ambiguity(self, children) def resolve(self, list): if len(list) == 2 and 'function_def' in list and 'assign' in list: return 'function_def' if 'grammar' in list and 'expr' in list: return 'expr' # print >> sys.stderr, 'resolve', str(list) return GenericASTBuilder.resolve(self, list) ############################################### # Common Python 2 and Python 3 grammar rules # ############################################### def p_start(self, args): ''' # The start or goal symbol stmts ::= sstmt+ ''' def p_call_stmt(self, args): ''' # eval-mode compilation. Single-mode interactive compilation # adds another rule. call_stmt ::= expr POP_TOP ''' def p_stmt(self, args): """ pass ::= _stmts ::= stmt+ # statements with continue c_stmts ::= _stmts c_stmts ::= _stmts lastc_stmt c_stmts ::= lastc_stmt c_stmts ::= continues lastc_stmt ::= iflaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtc c_stmts_opt ::= c_stmts c_stmts_opt ::= pass # statements inside a loop l_stmts ::= _stmts l_stmts ::= returns l_stmts ::= continues l_stmts ::= _stmts lastl_stmt l_stmts ::= lastl_stmt lastl_stmt ::= iflaststmtl lastl_stmt ::= ifelsestmtl lastl_stmt ::= forelselaststmtl lastl_stmt ::= tryelsestmtl l_stmts_opt ::= l_stmts l_stmts_opt ::= pass suite_stmts ::= _stmts suite_stmts ::= returns suite_stmts ::= continues suite_stmts_opt ::= suite_stmts # passtmt is needed for semantic actions to add "pass" suite_stmts_opt ::= pass else_suite ::= suite_stmts else_suitel ::= l_stmts else_suitec ::= c_stmts else_suitec ::= returns stmt ::= assert stmt ::= classdef stmt ::= call_stmt stmt ::= ifstmt stmt ::= ifelsestmt stmt ::= whilestmt stmt ::= while1stmt stmt ::= whileelsestmt stmt ::= while1elsestmt stmt ::= for stmt ::= forelsestmt stmt ::= try_except stmt ::= tryelsestmt stmt ::= tryfinallystmt stmt ::= withstmt stmt ::= withasstmt stmt ::= del_stmt del_stmt ::= DELETE_FAST del_stmt ::= DELETE_NAME del_stmt ::= DELETE_GLOBAL stmt ::= return return ::= ret_expr RETURN_VALUE # "returns" nonterminal is a sequence of statements that ends in a RETURN statement. # In later Python versions with jump optimization, this can cause JUMPs # that would normally appear to be omitted. returns ::= return returns ::= _stmts return """ pass def p_function_def(self, args): ''' stmt ::= function_def function_def ::= mkfunc store stmt ::= function_def_deco function_def_deco ::= mkfuncdeco store mkfuncdeco ::= expr mkfuncdeco CALL_FUNCTION_1 mkfuncdeco ::= expr mkfuncdeco0 CALL_FUNCTION_1 mkfuncdeco0 ::= mkfunc load_closure ::= load_closure LOAD_CLOSURE load_closure ::= LOAD_CLOSURE ''' def p_generator_exp(self, args): ''' expr ::= generator_exp stmt ::= genexpr_func genexpr_func ::= LOAD_FAST FOR_ITER store comp_iter JUMP_BACK ''' def p_jump(self, args): """ _jump ::= JUMP_ABSOLUTE _jump ::= JUMP_FORWARD _jump ::= JUMP_BACK # Zero or more COME_FROMs - loops can have this _come_froms ::= COME_FROM* # One or more COME_FROMs - joins of tryelse's have this come_froms ::= COME_FROM+ # Zero or one COME_FROM # And/or expressions have this come_from_opt ::= COME_FROM? """ def p_augmented_assign(self, args): ''' stmt ::= aug_assign1 stmt ::= aug_assign2 # This is odd in that other aug_assign1's have only 3 slots # The store isn't used as that's supposed to be also # indicated in the first expr aug_assign1 ::= expr expr inplace_op store aug_assign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR aug_assign2 ::= expr DUP_TOP LOAD_ATTR expr inplace_op ROT_TWO STORE_ATTR inplace_op ::= INPLACE_ADD inplace_op ::= INPLACE_SUBTRACT inplace_op ::= INPLACE_MULTIPLY inplace_op ::= INPLACE_TRUE_DIVIDE inplace_op ::= INPLACE_FLOOR_DIVIDE inplace_op ::= INPLACE_MODULO inplace_op ::= INPLACE_POWER inplace_op ::= INPLACE_LSHIFT inplace_op ::= INPLACE_RSHIFT inplace_op ::= INPLACE_AND inplace_op ::= INPLACE_XOR inplace_op ::= INPLACE_OR ''' def p_assign(self, args): ''' stmt ::= assign assign ::= expr DUP_TOP designList assign ::= expr store stmt ::= assign2 stmt ::= assign3 assign2 ::= expr expr ROT_TWO store store assign3 ::= expr expr expr ROT_THREE ROT_TWO store store store ''' def p_forstmt(self, args): """ for_iter ::= GET_ITER FOR_ITER for_block ::= l_stmts_opt _come_froms JUMP_BACK forelsestmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suite _come_froms forelselaststmt ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suitec _come_froms forelselaststmtl ::= SETUP_LOOP expr for_iter store for_block POP_BLOCK else_suitel _come_froms """ def p_import20(self, args): """ stmt ::= import stmt ::= import_from stmt ::= import_from_star stmt ::= importmultiple importlist ::= importlist alias importlist ::= alias alias ::= IMPORT_NAME store alias ::= IMPORT_FROM store alias ::= IMPORT_NAME attributes store import ::= LOAD_CONST LOAD_CONST alias import_from_star ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR import_from ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist POP_TOP importmultiple ::= LOAD_CONST LOAD_CONST alias imports_cont imports_cont ::= import_cont+ import_cont ::= LOAD_CONST LOAD_CONST alias attributes ::= LOAD_ATTR+ """ def p_list_comprehension(self, args): """ expr ::= list_comp list_iter ::= list_for list_iter ::= list_if list_iter ::= list_if_not list_iter ::= lc_body list_if ::= expr jmp_false list_iter list_if_not ::= expr jmp_true list_iter """ def p_set_comp(self, args): """ comp_iter ::= comp_for comp_iter ::= comp_body comp_body ::= gen_comp_body gen_comp_body ::= expr YIELD_VALUE POP_TOP comp_iter ::= comp_if comp_if ::= expr jmp_false comp_iter """ def p_expr(self, args): ''' expr ::= _mklambda expr ::= LOAD_FAST expr ::= LOAD_NAME expr ::= LOAD_CONST expr ::= LOAD_GLOBAL expr ::= LOAD_DEREF expr ::= binary_expr expr ::= list expr ::= compare expr ::= dict expr ::= and expr ::= or expr ::= unary_expr expr ::= call expr ::= unary_not expr ::= subscript expr ::= subscript2 expr ::= yield binary_expr ::= expr expr binary_op binary_op ::= BINARY_ADD binary_op ::= BINARY_MULTIPLY binary_op ::= BINARY_AND binary_op ::= BINARY_OR binary_op ::= BINARY_XOR binary_op ::= BINARY_SUBTRACT binary_op ::= BINARY_TRUE_DIVIDE binary_op ::= BINARY_FLOOR_DIVIDE binary_op ::= BINARY_MODULO binary_op ::= BINARY_LSHIFT binary_op ::= BINARY_RSHIFT binary_op ::= BINARY_POWER unary_expr ::= expr unary_op unary_op ::= UNARY_POSITIVE unary_op ::= UNARY_NEGATIVE unary_op ::= UNARY_INVERT unary_not ::= expr UNARY_NOT subscript ::= expr expr BINARY_SUBSCR attribute ::= expr LOAD_ATTR get_iter ::= expr GET_ITER yield ::= expr YIELD_VALUE _mklambda ::= mklambda expr ::= conditional ret_expr ::= expr ret_expr ::= ret_and ret_expr ::= ret_or ret_expr_or_cond ::= ret_expr ret_expr_or_cond ::= ret_cond stmt ::= return_lambda return_lambda ::= ret_expr RETURN_VALUE_LAMBDA LAMBDA_MARKER return_lambda ::= ret_expr RETURN_VALUE_LAMBDA compare ::= compare_chained compare ::= compare_single compare_single ::= expr expr COMPARE_OP # A compare_chained is two comparisions like x <= y <= z compare_chained ::= expr compare_chained1 ROT_TWO POP_TOP _come_froms compare_chained2 ::= expr COMPARE_OP JUMP_FORWARD # Non-null kvlist items are broken out in the indiviual grammars kvlist ::= # Positional arguments in make_function pos_arg ::= expr ''' def p_store(self, args): ''' # Note. The below is right-recursive: designList ::= store store designList ::= store DUP_TOP designList ## Can we replace with left-recursive, and redo with: ## ## designList ::= designLists store store ## designLists ::= designLists store DUP_TOP ## designLists ::= ## Will need to redo semantic actiion store ::= STORE_FAST store ::= STORE_NAME store ::= STORE_GLOBAL store ::= STORE_DEREF store ::= expr STORE_ATTR store ::= store_subscript store_subscript ::= expr expr STORE_SUBSCR store ::= unpack ''' def parse(p, tokens, customize): p.customize_grammar_rules(tokens, customize) ast = p.parse(tokens) # p.cleanup() return ast def get_python_parser( version, debug_parser=PARSER_DEFAULT_DEBUG, compile_mode='exec', is_pypy = False): """Returns parser object for Python version 2 or 3, 3.2, 3.5on, etc., depending on the parameters passed. *compile_mode* is either 'exec', 'eval', or 'single'. See https://docs.python.org/3.6/library/functions.html#compile for an explanation of the different modes. """ # If version is a string, turn that into the corresponding float. if isinstance(version, str): version = py_str2float(version) # FIXME: there has to be a better way... # We could do this as a table lookup, but that would force us # in import all of the parsers all of the time. Perhaps there is # a lazy way of doing the import? if version < 3.0: if version < 2.2: if version == 1.3: import uncompyle6.parsers.parse13 as parse13 if compile_mode == 'exec': p = parse13.Python14Parser(debug_parser) else: p = parse13.Python14ParserSingle(debug_parser) elif version == 1.4: import uncompyle6.parsers.parse14 as parse14 if compile_mode == 'exec': p = parse14.Python14Parser(debug_parser) else: p = parse14.Python14ParserSingle(debug_parser) elif version == 1.5: import uncompyle6.parsers.parse15 as parse15 if compile_mode == 'exec': p = parse15.Python15Parser(debug_parser) else: p = parse15.Python15ParserSingle(debug_parser) elif version == 2.1: import uncompyle6.parsers.parse21 as parse21 if compile_mode == 'exec': p = parse21.Python21Parser(debug_parser) else: p = parse21.Python21ParserSingle(debug_parser) elif version == 2.2: import uncompyle6.parsers.parse22 as parse22 if compile_mode == 'exec': p = parse22.Python22Parser(debug_parser) else: p = parse22.Python22ParserSingle(debug_parser) elif version == 2.3: import uncompyle6.parsers.parse23 as parse23 if compile_mode == 'exec': p = parse23.Python23Parser(debug_parser) else: p = parse23.Python23ParserSingle(debug_parser) elif version == 2.4: import uncompyle6.parsers.parse24 as parse24 if compile_mode == 'exec': p = parse24.Python24Parser(debug_parser) else: p = parse24.Python24ParserSingle(debug_parser) elif version == 2.5: import uncompyle6.parsers.parse25 as parse25 if compile_mode == 'exec': p = parse25.Python25Parser(debug_parser) else: p = parse25.Python25ParserSingle(debug_parser) elif version == 2.6: import uncompyle6.parsers.parse26 as parse26 if compile_mode == 'exec': p = parse26.Python26Parser(debug_parser) else: p = parse26.Python26ParserSingle(debug_parser) elif version == 2.7: import uncompyle6.parsers.parse27 as parse27 if compile_mode == 'exec': p = parse27.Python27Parser(debug_parser) else: p = parse27.Python27ParserSingle(debug_parser) else: import uncompyle6.parsers.parse2 as parse2 if compile_mode == 'exec': p = parse2.Python2Parser(debug_parser) else: p = parse2.Python2ParserSingle(debug_parser) pass pass pass else: import uncompyle6.parsers.parse3 as parse3 if version == 3.0: import uncompyle6.parsers.parse30 as parse30 if compile_mode == 'exec': p = parse30.Python30Parser(debug_parser) else: p = parse30.Python30ParserSingle(debug_parser) elif version == 3.1: import uncompyle6.parsers.parse31 as parse31 if compile_mode == 'exec': p = parse31.Python31Parser(debug_parser) else: p = parse31.Python31ParserSingle(debug_parser) elif version == 3.2: import uncompyle6.parsers.parse32 as parse32 if compile_mode == 'exec': p = parse32.Python32Parser(debug_parser) else: p = parse32.Python32ParserSingle(debug_parser) elif version == 3.3: import uncompyle6.parsers.parse33 as parse33 if compile_mode == 'exec': p = parse33.Python33Parser(debug_parser) else: p = parse33.Python33ParserSingle(debug_parser) elif version == 3.4: import uncompyle6.parsers.parse34 as parse34 if compile_mode == 'exec': p = parse34.Python34Parser(debug_parser) else: p = parse34.Python34ParserSingle(debug_parser) elif version == 3.5: import uncompyle6.parsers.parse35 as parse35 if compile_mode == 'exec': p = parse35.Python35Parser(debug_parser) else: p = parse35.Python35ParserSingle(debug_parser) elif version == 3.6: import uncompyle6.parsers.parse36 as parse36 if compile_mode == 'exec': p = parse36.Python36Parser(debug_parser) else: p = parse36.Python36ParserSingle(debug_parser) elif version == 3.7: import uncompyle6.parsers.parse37 as parse37 if compile_mode == 'exec': p = parse37.Python37Parser(debug_parser) else: p = parse37.Python37ParserSingle(debug_parser) elif version == 3.8: import uncompyle6.parsers.parse38 as parse38 if compile_mode == 'exec': p = parse38.Python38Parser(debug_parser) else: p = parse38.Python38ParserSingle(debug_parser) else: if compile_mode == 'exec': p = parse3.Python3Parser(debug_parser) else: p = parse3.Python3ParserSingle(debug_parser) p.version = version # p.dump_grammar() # debug return p class PythonParserSingle(PythonParser): def p_call_stmt_single(self, args): ''' # single-mode compilation. Eval-mode interactive compilation # drops the last rule. call_stmt ::= expr PRINT_EXPR ''' def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False): """ Parse a code object to an abstract syntax tree representation. :param version: The python version this code is from as a float, for example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc. :param co: The code object to parse. :param out: File like object to write the output to. :param showasm: Flag which determines whether the disassembled and ingested code is written to sys.stdout or not. :param parser_debug: dict containing debug flags for the spark parser. :return: Abstract syntax tree representation of the code object. """ assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version, is_pypy) tokens, customize = scanner.ingest(co) maybe_show_asm(showasm, tokens) # For heavy grammar debugging # parser_debug = {'rules': True, 'transition': True, 'reduce' : True, # 'showstack': 'full'} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize) if __name__ == '__main__': def parse_test(co): from uncompyle6 import PYTHON_VERSION, IS_PYPY ast = python_parser('2.7.13', co, showasm=True, is_pypy=True) ast = python_parser(PYTHON_VERSION, co, showasm=True, is_pypy=IS_PYPY) print(ast) return parse_test(parse_test.__code__)