From ad16ed69ebd3c3f79f9585200adf961ca34ff837 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 4 Feb 2024 14:54:07 -0500 Subject: [PATCH] Go over 2.x grammar testing --- test-unit/test_grammar.py | 74 +++++++++++++++++++++++------------ uncompyle6/parser.py | 39 ++++++++++-------- uncompyle6/parsers/parse27.py | 3 ++ uncompyle6/parsers/parse3.py | 71 +++++++++++++++++++++------------ 4 files changed, 122 insertions(+), 65 deletions(-) diff --git a/test-unit/test_grammar.py b/test-unit/test_grammar.py index 683d0bcc..7990b415 100644 --- a/test-unit/test_grammar.py +++ b/test-unit/test_grammar.py @@ -1,38 +1,53 @@ import re import unittest + +from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE + from uncompyle6.parser import get_python_parser, python_parser -from xdis.version_info import PYTHON_VERSION_TRIPLE, IS_PYPY + class TestGrammar(unittest.TestCase): def test_grammar(self): - def check_tokens(tokens, opcode_set): remain_tokens = set(tokens) - opcode_set - remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens]) - remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens]) + remain_tokens = set([re.sub("_\d+$", "", t) for t in remain_tokens]) + remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens]) remain_tokens = set(remain_tokens) - opcode_set - self.assertEqual(remain_tokens, set([]), - "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar())) + self.assertEqual( + remain_tokens, + set([]), + "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar()), + ) p = get_python_parser(PYTHON_VERSION_TRIPLE, is_pypy=IS_PYPY) - (lhs, rhs, tokens, - right_recursive, dup_rhs) = p.check_sets() - expect_lhs = set(['pos_arg', 'get_iter', 'attribute']) - unused_rhs = set(['list', 'call', 'mkfunc', - 'mklambda', - 'unpack',]) + (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets() + expect_lhs = set(["pos_arg", "get_iter", "attribute"]) + unused_rhs = set(["list", "call", "mkfunc", "unpack", "lambda_body"]) - expect_right_recursive = frozenset([('designList', - ('store', 'DUP_TOP', 'designList'))]) - expect_lhs.add('kwarg') + expect_right_recursive = frozenset( + [("designList", ("store", "DUP_TOP", "designList"))] + ) + expect_lhs.add("kwarg") + + if PYTHON_VERSION_TRIPLE[:2] == (2, 7): + expect_lhs.add("kv3") + expect_lhs.add("kvlist") + unused_rhs.add("dict") self.assertEqual(expect_lhs, set(lhs)) self.assertEqual(unused_rhs, set(rhs)) self.assertEqual(expect_right_recursive, right_recursive) - expect_dup_rhs = frozenset([('COME_FROM',), ('CONTINUE',), ('JUMP_ABSOLUTE',), - ('LOAD_CONST',), - ('JUMP_BACK',), ('JUMP_FORWARD',)]) + expect_dup_rhs = frozenset( + [ + ("COME_FROM",), + ("CONTINUE",), + ("JUMP_ABSOLUTE",), + ("LOAD_CONST",), + ("JUMP_BACK",), + ("JUMP_FORWARD",), + ] + ) reduced_dup_rhs = {} for k in dup_rhs: @@ -47,10 +62,21 @@ class TestGrammar(unittest.TestCase): # FIXME: Something got borked here def no_test_dup_rule(self): import inspect - python_parser(PYTHON_VERSION_TRIPLE, inspect.currentframe().f_code, - is_pypy=IS_PYPY, - parser_debug={ - 'dups': True, 'transition': False, 'reduce': False, - 'rules': False, 'errorstack': None, 'context': True}) -if __name__ == '__main__': + + python_parser( + PYTHON_VERSION_TRIPLE, + inspect.currentframe().f_code, + is_pypy=IS_PYPY, + parser_debug={ + "dups": True, + "transition": False, + "reduce": False, + "rules": False, + "errorstack": None, + "context": True, + }, + ) + + +if __name__ == "__main__": unittest.main() diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py index 1c354d97..2cdb2471 100644 --- a/uncompyle6/parser.py +++ b/uncompyle6/parser.py @@ -21,10 +21,11 @@ Common uncompyle6 parser routines. import sys -from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG -from uncompyle6.show import maybe_show_asm +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG, GenericASTBuilder from xdis import iscode +from uncompyle6.show import maybe_show_asm + class ParserError(Exception): def __init__(self, token, offset, debug=PARSER_DEFAULT_DEBUG): @@ -91,7 +92,14 @@ class PythonParser(GenericASTBuilder): # singleton reduction that we can simplify. It also happens to be optional # in its other derivation self.optional_nt |= frozenset( - ("come_froms", "suite_stmts", "l_stmts_opt", "c_stmts_opt", "stmts_opt", "stmt") + ( + "come_froms", + "suite_stmts", + "l_stmts_opt", + "c_stmts_opt", + "stmts_opt", + "stmt", + ) ) # Reduce singleton reductions in these nonterminals: @@ -113,10 +121,10 @@ class PythonParser(GenericASTBuilder): def add_unique_rule(self, rule, opname, arg_count, customize): """Add rule to grammar, but only if it hasn't been added previously - opname and stack_count are used in the customize() semantic - the actions to add the semantic action rule. Stack_count is - used in custom opcodes like MAKE_FUNCTION to indicate how - many arguments it has. Often it is not used. + opname and stack_count are used in the customize() semantic + the actions to add the semantic action rule. Stack_count is + used in custom opcodes like MAKE_FUNCTION to indicate how + many arguments it has. Often it is not used. """ if rule not in self.new_rules: # print("XXX ", rule) # debug @@ -223,7 +231,9 @@ class PythonParser(GenericASTBuilder): """ # Low byte indicates number of positional parameters, # high byte number of keyword parameters - assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith("CALL_METHOD") + assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith( + "CALL_METHOD" + ) args_pos = token.attr & 0xFF args_kw = (token.attr >> 8) & 0xFF return args_pos, args_kw @@ -304,9 +314,6 @@ class PythonParser(GenericASTBuilder): c_stmts ::= lastc_stmt c_stmts ::= continues - ending_return ::= RETURN_VALUE RETURN_LAST - ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER - lastc_stmt ::= iflaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtc @@ -314,9 +321,6 @@ class PythonParser(GenericASTBuilder): c_stmts_opt ::= c_stmts c_stmts_opt ::= pass - stmts_opt ::= _stmts - stmts_opt ::= pass - # statements inside a loop l_stmts ::= _stmts l_stmts ::= returns @@ -907,9 +911,12 @@ def python_parser( if __name__ == "__main__": def parse_test(co): - from xdis.version_info import PYTHON_VERSION_TRIPLE, IS_PYPY + from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE - ast = python_parser(PYTHON_VERSION_TRIPLE[:2], co, showasm=True, is_pypy=IS_PYPY) + ast = python_parser( + PYTHON_VERSION_TRIPLE[:2], co, showasm=True, is_pypy=IS_PYPY + ) print(ast) return + parse_test(parse_test.func_code) diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py index 88c0482f..3518e45b 100644 --- a/uncompyle6/parsers/parse27.py +++ b/uncompyle6/parsers/parse27.py @@ -38,6 +38,9 @@ class Python27Parser(Python2Parser): stmt ::= dict_comp_func + ending_return ::= RETURN_VALUE RETURN_LAST + ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER + dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store comp_iter JUMP_BACK ending_return diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py index 7019dbac..667010f4 100644 --- a/uncompyle6/parsers/parse3.py +++ b/uncompyle6/parsers/parse3.py @@ -27,22 +27,24 @@ that a later phase can turn into a sequence of ASCII text. """ import re -from uncompyle6.scanners.tok import Token + +from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG + from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func from uncompyle6.parsers.reducecheck import ( and_invalid, except_handler_else, ifelsestmt, - ifstmt, iflaststmt, + ifstmt, or_check, testtrue, tryelsestmtl3, tryexcept, - while1stmt + while1stmt, ) from uncompyle6.parsers.treenode import SyntaxTree -from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG +from uncompyle6.scanners.tok import Token class Python3Parser(PythonParser): @@ -79,6 +81,9 @@ class Python3Parser(PythonParser): stmt ::= set_comp_func + ending_return ::= RETURN_VALUE RETURN_LAST + ending_return ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER + # TODO this can be simplified set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter JUMP_BACK ending_return @@ -98,7 +103,7 @@ class Python3Parser(PythonParser): """ def p_dict_comp3(self, args): - """" + """ " expr ::= dict_comp stmt ::= dict_comp_func dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store @@ -519,7 +524,7 @@ class Python3Parser(PythonParser): expr call CALL_FUNCTION_3 - """ + """ # FIXME: I bet this can be simplified # look for next MAKE_FUNCTION j = i @@ -627,7 +632,11 @@ class Python3Parser(PythonParser): self.add_unique_rule(rule, token.kind, uniq_param, customize) if "LOAD_BUILD_CLASS" in self.seen_ops: - if next_token == "CALL_FUNCTION" and next_token.attr == 1 and pos_args_count > 1: + if ( + next_token == "CALL_FUNCTION" + and next_token.attr == 1 + and pos_args_count > 1 + ): rule = "classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d" % ( ("expr " * (pos_args_count - 1)), opname, @@ -766,18 +775,24 @@ class Python3Parser(PythonParser): elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"): if opname == "BUILD_CONST_DICT": - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s dict ::= const_list expr ::= dict - """ % opname + """ + % opname + ) else: - rule = """ + rule = ( + """ add_consts ::= ADD_VALUE* const_list ::= COLLECTION_START add_consts %s expr ::= const_list - """ % opname + """ + % opname + ) self.addRule(rule, nop_func) elif opname.startswith("BUILD_DICT_OLDER"): @@ -932,7 +947,6 @@ class Python3Parser(PythonParser): "CALL_FUNCTION_VAR_KW", ) ) or opname.startswith("CALL_FUNCTION_KW"): - if opname == "CALL_FUNCTION" and token.attr == 1: rule = """ dict_comp ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr @@ -1108,7 +1122,8 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1194,7 +1209,6 @@ class Python3Parser(PythonParser): ) self.add_unique_rule(rule, opname, token.attr, customize) - if self.version >= (3, 4): if not self.is_pypy: load_op = "LOAD_STR" @@ -1278,14 +1292,16 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize ) rule_pat = ( "generator_exp ::= %sload_closure load_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1337,7 +1353,8 @@ class Python3Parser(PythonParser): if has_get_iter_call_function1: rule_pat = ( "generator_exp ::= %sload_genexpr %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("pos_arg " * pos_args_count, opname) ) self.add_make_function_rule(rule_pat, opname, token.attr, customize) @@ -1349,7 +1366,8 @@ class Python3Parser(PythonParser): # Todo: For Pypy we need to modify this slightly rule_pat = ( "listcomp ::= %sLOAD_LISTCOMP %%s%s expr " - "GET_ITER CALL_FUNCTION_1" % ("expr " * pos_args_count, opname) + "GET_ITER CALL_FUNCTION_1" + % ("expr " * pos_args_count, opname) ) self.add_make_function_rule( rule_pat, opname, token.attr, customize @@ -1580,7 +1598,7 @@ class Python3Parser(PythonParser): } if self.version == (3, 6): - self.reduce_check_table["and"] = and_invalid + self.reduce_check_table["and"] = and_invalid self.check_reduce["and"] = "AST" self.check_reduce["annotate_tuple"] = "noAST" @@ -1610,7 +1628,7 @@ class Python3Parser(PythonParser): def reduce_is_invalid(self, rule, ast, tokens, first, last): lhs = rule[0] n = len(tokens) - last = min(last, n-1) + last = min(last, n - 1) fn = self.reduce_check_table.get(lhs, None) if fn: if fn(self, lhs, n, rule, ast, tokens, first, last): @@ -1636,13 +1654,18 @@ class Python3Parser(PythonParser): condition_jump2 = tokens[min(last - 1, len(tokens) - 1)] # If there are two *distinct* condition jumps, they should not jump to the # same place. Otherwise we have some sort of "and"/"or". - if condition_jump2.kind.startswith("POP_JUMP_IF") and condition_jump != condition_jump2: + if ( + condition_jump2.kind.startswith("POP_JUMP_IF") + and condition_jump != condition_jump2 + ): return condition_jump.attr == condition_jump2.attr - if tokens[last] == "COME_FROM" and tokens[last].off2int() != condition_jump.attr: + if ( + tokens[last] == "COME_FROM" + and tokens[last].off2int() != condition_jump.attr + ): return False - # if condition_jump.attr < condition_jump2.off2int(): # print("XXX", first, last) # for t in range(first, last): print(tokens[t]) @@ -1664,7 +1687,6 @@ class Python3Parser(PythonParser): < tokens[last].off2int() ) elif lhs == "while1stmt": - if while1stmt(self, lhs, n, rule, ast, tokens, first, last): return True @@ -1686,7 +1708,6 @@ class Python3Parser(PythonParser): return True return False elif lhs == "while1elsestmt": - n = len(tokens) if last == n: # Adjust for fuzziness in parsing