From ad16ed69ebd3c3f79f9585200adf961ca34ff837 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Sun, 4 Feb 2024 14:54:07 -0500
Subject: [PATCH] Go over 2.x grammar testing

---
 test-unit/test_grammar.py     | 74 +++++++++++++++++++++++------------
 uncompyle6/parser.py          | 39 ++++++++++--------
 uncompyle6/parsers/parse27.py |  3 ++
 uncompyle6/parsers/parse3.py  | 71 +++++++++++++++++++++------------
 4 files changed, 122 insertions(+), 65 deletions(-)

diff --git a/test-unit/test_grammar.py b/test-unit/test_grammar.py
index 683d0bcc..7990b415 100644
--- a/test-unit/test_grammar.py
+++ b/test-unit/test_grammar.py
@@ -1,38 +1,53 @@
 import re
 import unittest
+
+from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE
+
 from uncompyle6.parser import get_python_parser, python_parser
-from xdis.version_info import PYTHON_VERSION_TRIPLE, IS_PYPY
+
 
 class TestGrammar(unittest.TestCase):
     def test_grammar(self):
-
         def check_tokens(tokens, opcode_set):
             remain_tokens = set(tokens) - opcode_set
-            remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens])
-            remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens])
+            remain_tokens = set([re.sub("_\d+$", "", t) for t in remain_tokens])
+            remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens])
             remain_tokens = set(remain_tokens) - opcode_set
-            self.assertEqual(remain_tokens, set([]),
-                    "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar()))
+            self.assertEqual(
+                remain_tokens,
+                set([]),
+                "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar()),
+            )
 
         p = get_python_parser(PYTHON_VERSION_TRIPLE, is_pypy=IS_PYPY)
-        (lhs, rhs, tokens,
-         right_recursive, dup_rhs) = p.check_sets()
-        expect_lhs = set(['pos_arg', 'get_iter', 'attribute'])
-        unused_rhs = set(['list', 'call', 'mkfunc',
-                          'mklambda',
-                          'unpack',])
+        (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets()
+        expect_lhs = set(["pos_arg", "get_iter", "attribute"])
+        unused_rhs = set(["list", "call", "mkfunc", "unpack", "lambda_body"])
 
-        expect_right_recursive = frozenset([('designList',
-                                             ('store', 'DUP_TOP', 'designList'))])
-        expect_lhs.add('kwarg')
+        expect_right_recursive = frozenset(
+            [("designList", ("store", "DUP_TOP", "designList"))]
+        )
+        expect_lhs.add("kwarg")
+
+        if PYTHON_VERSION_TRIPLE[:2] == (2, 7):
+            expect_lhs.add("kv3")
+            expect_lhs.add("kvlist")
+            unused_rhs.add("dict")
 
         self.assertEqual(expect_lhs, set(lhs))
         self.assertEqual(unused_rhs, set(rhs))
         self.assertEqual(expect_right_recursive, right_recursive)
 
-        expect_dup_rhs = frozenset([('COME_FROM',), ('CONTINUE',), ('JUMP_ABSOLUTE',),
-                                    ('LOAD_CONST',),
-                                    ('JUMP_BACK',), ('JUMP_FORWARD',)])
+        expect_dup_rhs = frozenset(
+            [
+                ("COME_FROM",),
+                ("CONTINUE",),
+                ("JUMP_ABSOLUTE",),
+                ("LOAD_CONST",),
+                ("JUMP_BACK",),
+                ("JUMP_FORWARD",),
+            ]
+        )
 
         reduced_dup_rhs = {}
         for k in dup_rhs:
@@ -47,10 +62,21 @@ class TestGrammar(unittest.TestCase):
     # FIXME: Something got borked here
     def no_test_dup_rule(self):
         import inspect
-        python_parser(PYTHON_VERSION_TRIPLE, inspect.currentframe().f_code,
-                      is_pypy=IS_PYPY,
-                      parser_debug={
-                          'dups': True, 'transition': False, 'reduce': False,
-                          'rules': False, 'errorstack': None, 'context': True})
-if __name__ == '__main__':
+
+        python_parser(
+            PYTHON_VERSION_TRIPLE,
+            inspect.currentframe().f_code,
+            is_pypy=IS_PYPY,
+            parser_debug={
+                "dups": True,
+                "transition": False,
+                "reduce": False,
+                "rules": False,
+                "errorstack": None,
+                "context": True,
+            },
+        )
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/uncompyle6/parser.py b/uncompyle6/parser.py
index 1c354d97..2cdb2471 100644
--- a/uncompyle6/parser.py
+++ b/uncompyle6/parser.py
@@ -21,10 +21,11 @@ Common uncompyle6 parser routines.
 
 import sys
 
-from spark_parser import GenericASTBuilder, DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
-from uncompyle6.show import maybe_show_asm
+from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG, GenericASTBuilder
 from xdis import iscode
 
+from uncompyle6.show import maybe_show_asm
+
 
 class ParserError(Exception):
     def __init__(self, token, offset, debug=PARSER_DEFAULT_DEBUG):
@@ -91,7 +92,14 @@ class PythonParser(GenericASTBuilder):
         # singleton reduction that we can simplify. It also happens to be optional
         # in its other derivation
         self.optional_nt |= frozenset(
-            ("come_froms", "suite_stmts", "l_stmts_opt", "c_stmts_opt", "stmts_opt", "stmt")
+            (
+                "come_froms",
+                "suite_stmts",
+                "l_stmts_opt",
+                "c_stmts_opt",
+                "stmts_opt",
+                "stmt",
+            )
         )
 
         # Reduce singleton reductions in these nonterminals:
@@ -113,10 +121,10 @@ class PythonParser(GenericASTBuilder):
 
     def add_unique_rule(self, rule, opname, arg_count, customize):
         """Add rule to grammar, but only if it hasn't been added previously
-           opname and stack_count are used in the customize() semantic
-           the actions to add the semantic action rule. Stack_count is
-           used in custom opcodes like MAKE_FUNCTION to indicate how
-           many arguments it has. Often it is not used.
+        opname and stack_count are used in the customize() semantic
+        the actions to add the semantic action rule. Stack_count is
+        used in custom opcodes like MAKE_FUNCTION to indicate how
+        many arguments it has. Often it is not used.
         """
         if rule not in self.new_rules:
             # print("XXX ", rule) # debug
@@ -223,7 +231,9 @@ class PythonParser(GenericASTBuilder):
         """
         # Low byte indicates number of positional parameters,
         # high byte number of keyword parameters
-        assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith("CALL_METHOD")
+        assert token.kind.startswith("CALL_FUNCTION") or token.kind.startswith(
+            "CALL_METHOD"
+        )
         args_pos = token.attr & 0xFF
         args_kw = (token.attr >> 8) & 0xFF
         return args_pos, args_kw
@@ -304,9 +314,6 @@ class PythonParser(GenericASTBuilder):
         c_stmts ::= lastc_stmt
         c_stmts ::= continues
 
-        ending_return  ::= RETURN_VALUE RETURN_LAST
-        ending_return  ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER
-
         lastc_stmt ::= iflaststmt
         lastc_stmt ::= forelselaststmt
         lastc_stmt ::= ifelsestmtc
@@ -314,9 +321,6 @@ class PythonParser(GenericASTBuilder):
         c_stmts_opt ::= c_stmts
         c_stmts_opt ::= pass
 
-        stmts_opt ::= _stmts
-        stmts_opt ::= pass
-
         # statements inside a loop
         l_stmts ::= _stmts
         l_stmts ::= returns
@@ -907,9 +911,12 @@ def python_parser(
 if __name__ == "__main__":
 
     def parse_test(co):
-        from xdis.version_info import PYTHON_VERSION_TRIPLE, IS_PYPY
+        from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE
 
-        ast = python_parser(PYTHON_VERSION_TRIPLE[:2], co, showasm=True, is_pypy=IS_PYPY)
+        ast = python_parser(
+            PYTHON_VERSION_TRIPLE[:2], co, showasm=True, is_pypy=IS_PYPY
+        )
         print(ast)
         return
+
     parse_test(parse_test.func_code)
diff --git a/uncompyle6/parsers/parse27.py b/uncompyle6/parsers/parse27.py
index 88c0482f..3518e45b 100644
--- a/uncompyle6/parsers/parse27.py
+++ b/uncompyle6/parsers/parse27.py
@@ -38,6 +38,9 @@ class Python27Parser(Python2Parser):
 
         stmt           ::= dict_comp_func
 
+        ending_return  ::= RETURN_VALUE RETURN_LAST
+        ending_return  ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER
+
         dict_comp_func ::= BUILD_MAP_0 LOAD_FAST FOR_ITER store
                            comp_iter JUMP_BACK ending_return
 
diff --git a/uncompyle6/parsers/parse3.py b/uncompyle6/parsers/parse3.py
index 7019dbac..667010f4 100644
--- a/uncompyle6/parsers/parse3.py
+++ b/uncompyle6/parsers/parse3.py
@@ -27,22 +27,24 @@ that a later phase can turn into a sequence of ASCII text.
 """
 
 import re
-from uncompyle6.scanners.tok import Token
+
+from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
+
 from uncompyle6.parser import PythonParser, PythonParserSingle, nop_func
 from uncompyle6.parsers.reducecheck import (
     and_invalid,
     except_handler_else,
     ifelsestmt,
-    ifstmt,
     iflaststmt,
+    ifstmt,
     or_check,
     testtrue,
     tryelsestmtl3,
     tryexcept,
-    while1stmt
+    while1stmt,
 )
 from uncompyle6.parsers.treenode import SyntaxTree
-from spark_parser import DEFAULT_DEBUG as PARSER_DEFAULT_DEBUG
+from uncompyle6.scanners.tok import Token
 
 
 class Python3Parser(PythonParser):
@@ -79,6 +81,9 @@ class Python3Parser(PythonParser):
 
         stmt ::= set_comp_func
 
+        ending_return  ::= RETURN_VALUE RETURN_LAST
+        ending_return  ::= RETURN_VALUE_LAMBDA LAMBDA_MARKER
+
         # TODO this can be simplified
         set_comp_func ::= BUILD_SET_0 LOAD_ARG FOR_ITER store comp_iter
                           JUMP_BACK ending_return
@@ -98,7 +103,7 @@ class Python3Parser(PythonParser):
         """
 
     def p_dict_comp3(self, args):
-        """"
+        """ "
         expr ::= dict_comp
         stmt ::= dict_comp_func
         dict_comp_func ::= BUILD_MAP_0 LOAD_ARG FOR_ITER store
@@ -519,7 +524,7 @@ class Python3Parser(PythonParser):
                         expr
                         call
                         CALL_FUNCTION_3
-         """
+        """
         # FIXME: I bet this can be simplified
         # look for next MAKE_FUNCTION
         j = i
@@ -627,7 +632,11 @@ class Python3Parser(PythonParser):
         self.add_unique_rule(rule, token.kind, uniq_param, customize)
 
         if "LOAD_BUILD_CLASS" in self.seen_ops:
-            if next_token == "CALL_FUNCTION" and next_token.attr == 1 and pos_args_count > 1:
+            if (
+                next_token == "CALL_FUNCTION"
+                and next_token.attr == 1
+                and pos_args_count > 1
+            ):
                 rule = "classdefdeco2 ::= LOAD_BUILD_CLASS mkfunc %s%s_%d" % (
                     ("expr " * (pos_args_count - 1)),
                     opname,
@@ -766,18 +775,24 @@ class Python3Parser(PythonParser):
 
             elif opname in ("BUILD_CONST_LIST", "BUILD_CONST_DICT", "BUILD_CONST_SET"):
                 if opname == "BUILD_CONST_DICT":
-                    rule = """
+                    rule = (
+                        """
                            add_consts          ::= ADD_VALUE*
                            const_list          ::= COLLECTION_START add_consts %s
                            dict                ::= const_list
                            expr                ::= dict
-                           """ % opname
+                           """
+                        % opname
+                    )
                 else:
-                    rule = """
+                    rule = (
+                        """
                            add_consts          ::= ADD_VALUE*
                            const_list          ::= COLLECTION_START add_consts %s
                            expr                ::= const_list
-                           """ % opname
+                           """
+                        % opname
+                    )
                 self.addRule(rule, nop_func)
 
             elif opname.startswith("BUILD_DICT_OLDER"):
@@ -932,7 +947,6 @@ class Python3Parser(PythonParser):
                     "CALL_FUNCTION_VAR_KW",
                 )
             ) or opname.startswith("CALL_FUNCTION_KW"):
-
                 if opname == "CALL_FUNCTION" and token.attr == 1:
                     rule = """
                      dict_comp    ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr
@@ -1108,7 +1122,8 @@ class Python3Parser(PythonParser):
                 if has_get_iter_call_function1:
                     rule_pat = (
                         "generator_exp ::= %sload_closure load_genexpr %%s%s expr "
-                        "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname)
+                        "GET_ITER CALL_FUNCTION_1"
+                        % ("pos_arg " * pos_args_count, opname)
                     )
                     self.add_make_function_rule(rule_pat, opname, token.attr, customize)
 
@@ -1194,7 +1209,6 @@ class Python3Parser(PythonParser):
                         )
                     self.add_unique_rule(rule, opname, token.attr, customize)
 
-
                 if self.version >= (3, 4):
                     if not self.is_pypy:
                         load_op = "LOAD_STR"
@@ -1278,14 +1292,16 @@ class Python3Parser(PythonParser):
                     if has_get_iter_call_function1:
                         rule_pat = (
                             "generator_exp ::= %sload_genexpr %%s%s expr "
-                            "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname)
+                            "GET_ITER CALL_FUNCTION_1"
+                            % ("pos_arg " * pos_args_count, opname)
                         )
                         self.add_make_function_rule(
                             rule_pat, opname, token.attr, customize
                         )
                         rule_pat = (
                             "generator_exp ::= %sload_closure load_genexpr %%s%s expr "
-                            "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname)
+                            "GET_ITER CALL_FUNCTION_1"
+                            % ("pos_arg " * pos_args_count, opname)
                         )
                         self.add_make_function_rule(
                             rule_pat, opname, token.attr, customize
@@ -1337,7 +1353,8 @@ class Python3Parser(PythonParser):
                 if has_get_iter_call_function1:
                     rule_pat = (
                         "generator_exp ::= %sload_genexpr %%s%s expr "
-                        "GET_ITER CALL_FUNCTION_1" % ("pos_arg " * pos_args_count, opname)
+                        "GET_ITER CALL_FUNCTION_1"
+                        % ("pos_arg " * pos_args_count, opname)
                     )
                     self.add_make_function_rule(rule_pat, opname, token.attr, customize)
 
@@ -1349,7 +1366,8 @@ class Python3Parser(PythonParser):
                         # Todo: For Pypy we need to modify this slightly
                         rule_pat = (
                             "listcomp ::= %sLOAD_LISTCOMP %%s%s expr "
-                            "GET_ITER CALL_FUNCTION_1" % ("expr " * pos_args_count, opname)
+                            "GET_ITER CALL_FUNCTION_1"
+                            % ("expr " * pos_args_count, opname)
                         )
                         self.add_make_function_rule(
                             rule_pat, opname, token.attr, customize
@@ -1580,7 +1598,7 @@ class Python3Parser(PythonParser):
         }
 
         if self.version == (3, 6):
-            self.reduce_check_table["and"] =  and_invalid
+            self.reduce_check_table["and"] = and_invalid
             self.check_reduce["and"] = "AST"
 
         self.check_reduce["annotate_tuple"] = "noAST"
@@ -1610,7 +1628,7 @@ class Python3Parser(PythonParser):
     def reduce_is_invalid(self, rule, ast, tokens, first, last):
         lhs = rule[0]
         n = len(tokens)
-        last = min(last, n-1)
+        last = min(last, n - 1)
         fn = self.reduce_check_table.get(lhs, None)
         if fn:
             if fn(self, lhs, n, rule, ast, tokens, first, last):
@@ -1636,13 +1654,18 @@ class Python3Parser(PythonParser):
                 condition_jump2 = tokens[min(last - 1, len(tokens) - 1)]
                 # If there are two *distinct* condition jumps, they should not jump to the
                 # same place. Otherwise we have some sort of "and"/"or".
-                if condition_jump2.kind.startswith("POP_JUMP_IF") and condition_jump != condition_jump2:
+                if (
+                    condition_jump2.kind.startswith("POP_JUMP_IF")
+                    and condition_jump != condition_jump2
+                ):
                     return condition_jump.attr == condition_jump2.attr
 
-                if tokens[last] == "COME_FROM" and tokens[last].off2int() != condition_jump.attr:
+                if (
+                    tokens[last] == "COME_FROM"
+                    and tokens[last].off2int() != condition_jump.attr
+                ):
                     return False
 
-
                 # if condition_jump.attr < condition_jump2.off2int():
                 #     print("XXX", first, last)
                 #     for t in range(first, last): print(tokens[t])
@@ -1664,7 +1687,6 @@ class Python3Parser(PythonParser):
                 < tokens[last].off2int()
             )
         elif lhs == "while1stmt":
-
             if while1stmt(self, lhs, n, rule, ast, tokens, first, last):
                 return True
 
@@ -1686,7 +1708,6 @@ class Python3Parser(PythonParser):
                     return True
             return False
         elif lhs == "while1elsestmt":
-
             n = len(tokens)
             if last == n:
                 # Adjust for fuzziness in parsing