withasstmt -> with_as

This matches Python's AST naamae better. Some linting and
sorting of dictionary keys done as well.
This commit is contained in:
rocky
2024-03-08 04:09:47 -05:00
parent 3724e02183
commit 156188f8bb
13 changed files with 356 additions and 257 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2016-2020, 2022-2023 Rocky Bernstein
# Copyright (c) 2016-2020, 2022-2024 Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -17,24 +17,25 @@ spark grammar differences over Python 3.5 for Python 3.6.
"""
from __future__ import print_function
from uncompyle6.parser import PythonParserSingle, nop_func
from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
from uncompyle6.parser import PythonParserSingle, nop_func
from uncompyle6.parsers.parse35 import Python35Parser
from uncompyle6.scanners.tok import Token
class Python36Parser(Python35Parser):
class Python36Parser(Python35Parser):
def __init__(self, debug_parser=PARSER_DEFAULT_DEBUG):
super(Python36Parser, self).__init__(debug_parser)
self.customized = {}
def p_36_jump(self, args):
"""
# Zero or one COME_FROM
# And/or expressions have this
come_from_opt ::= COME_FROM?
"""
def p_36_misc(self, args):
"""sstmt ::= sstmt RETURN_LAST
@@ -207,7 +208,8 @@ class Python36Parser(Python35Parser):
# self.remove_rules("""
# """)
super(Python36Parser, self).customize_grammar_rules(tokens, customize)
self.remove_rules("""
self.remove_rules(
"""
_ifstmts_jumpl ::= c_stmts_opt
_ifstmts_jumpl ::= _ifstmts_jump
except_handler ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM
@@ -234,7 +236,8 @@ class Python36Parser(Python35Parser):
for_block pb_ja
else_suite COME_FROM_LOOP
""")
"""
)
self.check_reduce["call_kw"] = "AST"
# Opcode names in the custom_ops_processed set have rules that get added
@@ -247,24 +250,23 @@ class Python36Parser(Python35Parser):
# the start.
custom_ops_processed = set()
for i, token in enumerate(tokens):
opname = token.kind
if opname == 'FORMAT_VALUE':
if opname == "FORMAT_VALUE":
rules_str = """
expr ::= formatted_value1
formatted_value1 ::= expr FORMAT_VALUE
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'FORMAT_VALUE_ATTR':
elif opname == "FORMAT_VALUE_ATTR":
rules_str = """
expr ::= formatted_value2
formatted_value2 ::= expr expr FORMAT_VALUE_ATTR
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname == 'MAKE_FUNCTION_CLOSURE':
if 'LOAD_DICTCOMP' in self.seen_ops:
elif opname == "MAKE_FUNCTION_CLOSURE":
if "LOAD_DICTCOMP" in self.seen_ops:
# Is there something general going on here?
rule = """
dict_comp ::= load_closure LOAD_DICTCOMP LOAD_STR
@@ -272,7 +274,7 @@ class Python36Parser(Python35Parser):
GET_ITER CALL_FUNCTION_1
"""
self.addRule(rule, nop_func)
elif 'LOAD_SETCOMP' in self.seen_ops:
elif "LOAD_SETCOMP" in self.seen_ops:
rule = """
set_comp ::= load_closure LOAD_SETCOMP LOAD_STR
MAKE_FUNCTION_CLOSURE expr
@@ -280,7 +282,7 @@ class Python36Parser(Python35Parser):
"""
self.addRule(rule, nop_func)
elif opname == 'BEFORE_ASYNC_WITH':
elif opname == "BEFORE_ASYNC_WITH":
rules_str = """
stmt ::= async_with_stmt
async_with_pre ::= BEFORE_ASYNC_WITH GET_AWAITABLE LOAD_CONST YIELD_FROM SETUP_ASYNC_WITH
@@ -306,30 +308,37 @@ class Python36Parser(Python35Parser):
"""
self.addRule(rules_str, nop_func)
elif opname.startswith('BUILD_STRING'):
elif opname.startswith("BUILD_STRING"):
v = token.attr
rules_str = """
expr ::= joined_str
joined_str ::= %sBUILD_STRING_%d
""" % ("expr " * v, v)
""" % (
"expr " * v,
v,
)
self.add_unique_doc_rules(rules_str, customize)
if 'FORMAT_VALUE_ATTR' in self.seen_ops:
if "FORMAT_VALUE_ATTR" in self.seen_ops:
rules_str = """
formatted_value_attr ::= expr expr FORMAT_VALUE_ATTR expr BUILD_STRING
expr ::= formatted_value_attr
"""
self.add_unique_doc_rules(rules_str, customize)
elif opname.startswith('BUILD_MAP_UNPACK_WITH_CALL'):
elif opname.startswith("BUILD_MAP_UNPACK_WITH_CALL"):
v = token.attr
rule = 'build_map_unpack_with_call ::= %s%s' % ('expr ' * v, opname)
rule = "build_map_unpack_with_call ::= %s%s" % ("expr " * v, opname)
self.addRule(rule, nop_func)
elif opname.startswith('BUILD_TUPLE_UNPACK_WITH_CALL'):
elif opname.startswith("BUILD_TUPLE_UNPACK_WITH_CALL"):
v = token.attr
rule = ('build_tuple_unpack_with_call ::= ' + 'expr1024 ' * int(v//1024) +
'expr32 ' * int((v//32) % 32) +
'expr ' * (v % 32) + opname)
rule = (
"build_tuple_unpack_with_call ::= "
+ "expr1024 " * int(v // 1024)
+ "expr32 " * int((v // 32) % 32)
+ "expr " * (v % 32)
+ opname
)
self.addRule(rule, nop_func)
rule = ('starred ::= %s %s' % ('expr ' * v, opname))
rule = "starred ::= %s %s" % ("expr " * v, opname)
self.addRule(rule, nop_func)
elif opname == "GET_AITER":
self.addRule(
@@ -475,7 +484,6 @@ class Python36Parser(Python35Parser):
)
custom_ops_processed.add(opname)
elif opname == "GET_ANEXT":
self.addRule(
"""
@@ -500,7 +508,7 @@ class Python36Parser(Python35Parser):
)
custom_ops_processed.add(opname)
elif opname == 'SETUP_ANNOTATIONS':
elif opname == "SETUP_ANNOTATIONS":
# 3.6 Variable Annotations PEP 526
# This seems to come before STORE_ANNOTATION, and doesn't
# correspond to direct Python source code.
@@ -516,7 +524,7 @@ class Python36Parser(Python35Parser):
"""
self.addRule(rule, nop_func)
# Check to combine assignment + annotation into one statement
self.check_reduce['assign'] = 'token'
self.check_reduce["assign"] = "token"
elif opname == "WITH_CLEANUP_START":
rules_str = """
stmt ::= with_null
@@ -524,13 +532,13 @@ class Python36Parser(Python35Parser):
with_suffix ::= WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
"""
self.addRule(rules_str, nop_func)
elif opname == 'SETUP_WITH':
elif opname == "SETUP_WITH":
rules_str = """
with ::= expr SETUP_WITH POP_TOP suite_stmts_opt COME_FROM_WITH
with_suffix
# Removes POP_BLOCK LOAD_CONST from 3.6-
withasstmt ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH
with_as ::= expr SETUP_WITH store suite_stmts_opt COME_FROM_WITH
with_suffix
with ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK
BEGIN_FINALLY COME_FROM_WITH
@@ -542,7 +550,6 @@ class Python36Parser(Python35Parser):
return
def custom_classfunc_rule(self, opname, token, customize, next_token, is_pypy):
args_pos, args_kw = self.get_pos_kw(token)
# Additional exprs for * and ** args:
@@ -550,140 +557,186 @@ class Python36Parser(Python35Parser):
# 1 for CALL_FUNCTION_VAR or CALL_FUNCTION_KW
# 2 for * and ** args (CALL_FUNCTION_VAR_KW).
# Yes, this computation based on instruction name is a little bit hoaky.
nak = ( len(opname)-len('CALL_FUNCTION') ) // 3
nak = (len(opname) - len("CALL_FUNCTION")) // 3
uniq_param = args_kw + args_pos
if frozenset(('GET_AWAITABLE', 'YIELD_FROM')).issubset(self.seen_ops):
rule = ('async_call ::= expr ' +
('pos_arg ' * args_pos) +
('kwarg ' * args_kw) +
'expr ' * nak + token.kind +
' GET_AWAITABLE LOAD_CONST YIELD_FROM')
if frozenset(("GET_AWAITABLE", "YIELD_FROM")).issubset(self.seen_ops):
rule = (
"async_call ::= expr "
+ ("pos_arg " * args_pos)
+ ("kwarg " * args_kw)
+ "expr " * nak
+ token.kind
+ " GET_AWAITABLE LOAD_CONST YIELD_FROM"
)
self.add_unique_rule(rule, token.kind, uniq_param, customize)
self.add_unique_rule('expr ::= async_call', token.kind, uniq_param, customize)
self.add_unique_rule(
"expr ::= async_call", token.kind, uniq_param, customize
)
if opname.startswith('CALL_FUNCTION_KW'):
if opname.startswith("CALL_FUNCTION_KW"):
if is_pypy:
# PYPY doesn't follow CPython 3.6 CALL_FUNCTION_KW conventions
super(Python36Parser, self).custom_classfunc_rule(opname, token, customize, next_token, is_pypy)
super(Python36Parser, self).custom_classfunc_rule(
opname, token, customize, next_token, is_pypy
)
else:
self.addRule("expr ::= call_kw36", nop_func)
values = 'expr ' * token.attr
rule = "call_kw36 ::= expr {values} LOAD_CONST {opname}".format(**locals())
values = "expr " * token.attr
rule = "call_kw36 ::= expr {values} LOAD_CONST {opname}".format(
**locals()
)
self.add_unique_rule(rule, token.kind, token.attr, customize)
elif opname == 'CALL_FUNCTION_EX_KW':
elif opname == "CALL_FUNCTION_EX_KW":
# Note: this doesn't exist in 3.7 and later
self.addRule("""expr ::= call_ex_kw4
self.addRule(
"""expr ::= call_ex_kw4
call_ex_kw4 ::= expr
expr
expr
CALL_FUNCTION_EX_KW
""",
nop_func)
if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_op_basenames:
self.addRule("""expr ::= call_ex_kw
nop_func,
)
if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_op_basenames:
self.addRule(
"""expr ::= call_ex_kw
call_ex_kw ::= expr expr build_map_unpack_with_call
CALL_FUNCTION_EX_KW
""", nop_func)
if 'BUILD_TUPLE_UNPACK_WITH_CALL' in self.seen_op_basenames:
""",
nop_func,
)
if "BUILD_TUPLE_UNPACK_WITH_CALL" in self.seen_op_basenames:
# FIXME: should this be parameterized by EX value?
self.addRule("""expr ::= call_ex_kw3
self.addRule(
"""expr ::= call_ex_kw3
call_ex_kw3 ::= expr
build_tuple_unpack_with_call
expr
CALL_FUNCTION_EX_KW
""", nop_func)
if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_op_basenames:
""",
nop_func,
)
if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_op_basenames:
# FIXME: should this be parameterized by EX value?
self.addRule("""expr ::= call_ex_kw2
self.addRule(
"""expr ::= call_ex_kw2
call_ex_kw2 ::= expr
build_tuple_unpack_with_call
build_map_unpack_with_call
CALL_FUNCTION_EX_KW
""", nop_func)
""",
nop_func,
)
elif opname == 'CALL_FUNCTION_EX':
self.addRule("""
elif opname == "CALL_FUNCTION_EX":
self.addRule(
"""
expr ::= call_ex
starred ::= expr
call_ex ::= expr starred CALL_FUNCTION_EX
""", nop_func)
""",
nop_func,
)
if self.version >= (3, 6):
if 'BUILD_MAP_UNPACK_WITH_CALL' in self.seen_ops:
self.addRule("""
if "BUILD_MAP_UNPACK_WITH_CALL" in self.seen_ops:
self.addRule(
"""
expr ::= call_ex_kw
call_ex_kw ::= expr expr
build_map_unpack_with_call CALL_FUNCTION_EX
""", nop_func)
if 'BUILD_TUPLE_UNPACK_WITH_CALL' in self.seen_ops:
self.addRule("""
""",
nop_func,
)
if "BUILD_TUPLE_UNPACK_WITH_CALL" in self.seen_ops:
self.addRule(
"""
expr ::= call_ex_kw3
call_ex_kw3 ::= expr
build_tuple_unpack_with_call
%s
CALL_FUNCTION_EX
""" % 'expr ' * token.attr, nop_func)
"""
% "expr "
* token.attr,
nop_func,
)
pass
# FIXME: Is this right?
self.addRule("""
self.addRule(
"""
expr ::= call_ex_kw4
call_ex_kw4 ::= expr
expr
expr
CALL_FUNCTION_EX
""", nop_func)
""",
nop_func,
)
pass
else:
super(Python36Parser, self).custom_classfunc_rule(opname, token, customize, next_token, is_pypy)
super(Python36Parser, self).custom_classfunc_rule(
opname, token, customize, next_token, is_pypy
)
def reduce_is_invalid(self, rule, ast, tokens, first, last):
invalid = super(Python36Parser,
self).reduce_is_invalid(rule, ast,
tokens, first, last)
invalid = super(Python36Parser, self).reduce_is_invalid(
rule, ast, tokens, first, last
)
if invalid:
return invalid
if rule[0] == 'assign':
if rule[0] == "assign":
# Try to combine assignment + annotation into one statement
if (len(tokens) >= last + 1 and
tokens[last] == 'LOAD_NAME' and
tokens[last+1] == 'STORE_ANNOTATION' and
tokens[last-1].pattr == tokens[last+1].pattr):
if (
len(tokens) >= last + 1
and tokens[last] == "LOAD_NAME"
and tokens[last + 1] == "STORE_ANNOTATION"
and tokens[last - 1].pattr == tokens[last + 1].pattr
):
# Will handle as ann_assign_init_value
return True
pass
if rule[0] == 'call_kw':
if rule[0] == "call_kw":
# Make sure we don't derive call_kw
nt = ast[0]
while not isinstance(nt, Token):
if nt[0] == 'call_kw':
if nt[0] == "call_kw":
return True
nt = nt[0]
pass
pass
return False
class Python36ParserSingle(Python36Parser, PythonParserSingle):
pass
if __name__ == '__main__':
if __name__ == "__main__":
# Check grammar
p = Python36Parser()
p.check_grammar()
from xdis.version_info import PYTHON_VERSION_TRIPLE, IS_PYPY
from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE
if PYTHON_VERSION_TRIPLE[:2] == (3, 6):
lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets()
from uncompyle6.scanner import get_scanner
s = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY)
opcode_set = set(s.opc.opname).union(set(
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
opcode_set = set(s.opc.opname).union(
set(
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME
LAMBDA_MARKER RETURN_LAST
""".split()))
""".split()
)
)
remain_tokens = set(tokens) - opcode_set
import re
remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens])
remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens])
remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens])
remain_tokens = set(remain_tokens) - opcode_set
print(remain_tokens)
# print(sorted(p.rule2name.items()))